In [None]:
!pip install google-generativeai




In [None]:
import os
import time
import google.generativeai as genai

from google.colab import userdata

genai.configure(api_key=userdata.get('GOOGLE_API_KEY'))

def upload_to_gemini(path, mime_type=None):
  """Uploads the given file to Gemini.

  See https://ai.google.dev/gemini-api/docs/prompting_with_media
  """
  file = genai.upload_file(path, mime_type=mime_type)
  print(f"Uploaded file '{file.display_name}' as: {file.uri}")
  return file

In [None]:
def wait_for_files_active(files):
  """Waits for the given files to be active.

  Some files uploaded to the Gemini API need to be processed before they can be
  used as prompt inputs. The status can be seen by querying the file's "state"
  field.

  This implementation uses a simple blocking polling loop. Production code
  should probably employ a more sophisticated approach.
  """
  print("Waiting for file processing...")
  for name in (file.name for file in files):
    file = genai.get_file(name)
    while file.state.name == "PROCESSING":
      print(".", end="", flush=True)
      time.sleep(10)
      file = genai.get_file(name)
    if file.state.name != "ACTIVE":
      raise Exception(f"File {file.name} failed to process")
    return file
  print("...all files ready")
  print()

In [None]:
# Create the model
generation_config = {
  "temperature": 0,
  "top_p": 0.95,
  "top_k": 64,
  "max_output_tokens": 8192,
  "response_mime_type": "text/plain",
}

In [None]:
model = genai.GenerativeModel(
  model_name="gemini-1.5-flash",
  generation_config=generation_config,
  # safety_settings = Adjust safety settings
  # See https://ai.google.dev/gemini-api/docs/safety-settings
)

# TODO Make these files available on the local file system
# You may need to update the file paths
files = [
  upload_to_gemini("2407.01449v2.pdf", mime_type="application/pdf"),
]

# Some files have a processing delay. Wait for them to be ready.
wait_for_files_active(files)

chat_session = model.start_chat(
  history=[
    {
      "role": "user",
      "parts": [
        files[0],
        "What is the title of this paper?\n\n",
      ],
    },

  ]
)



Uploaded file '2407.01449v2.pdf' as: https://generativelanguage.googleapis.com/v1beta/files/p50159e4zcc8
Waiting for file processing...
...all files ready



In [None]:
response = chat_session.send_message("How many Figures are in the paper?")

print(response.text)

print(response.usage_metadata)


The paper contains 8 Figures. 

prompt_token_count: 30421
candidates_token_count: 7
total_token_count: 30428



In [None]:
response = chat_session.send_message("Who are the authors?")

print(response.text)
print(response.usage_metadata)


The authors of the paper are:

* Manuel Faysse
* Hugues Sibille
* Tony Wu
* Bilel Omrani
* Gautier Viaud
* Céline Hudelot
* Pierre Colombo 

prompt_token_count: 30437
candidates_token_count: 44
total_token_count: 30481



In [None]:
response = chat_session.send_message("What are the major contributions of the paper accordig to the authors?")

print(response.text)

print(response.usage_metadata)


The authors highlight two major contributions of their paper:

1. **ViDoRe:** A comprehensive benchmark for evaluating document retrieval systems that go beyond text-only systems and consider visual elements. 
2. **ColPali:** A novel document retrieval model architecture that leverages the capabilities of Vision Language Models (VLMs) to produce high-quality contextualized embeddings solely from images of document pages. 

The authors argue that ColPali outperforms modern document retrieval pipelines while being drastically faster and end-to-end trainable. 

prompt_token_count: 30499
candidates_token_count: 109
total_token_count: 30608



### Adding Context Cache

In [None]:
files = [
  upload_to_gemini("2407.01449v2.pdf", mime_type="application/pdf"),
]

In [None]:
path_to_pdf_file = '2403.06634.pdf'

# Upload the video using the Files API
pdf_file = genai.upload_file(path=path_to_pdf_file)

# Wait for the file to finish processing
while pdf_file.state.name == 'PROCESSING':
  print('Waiting for video to be processed.')
  time.sleep(2)
  pdf_file = genai.get_file(pdf_file.name)

print(f'Video processing complete: {pdf_file.uri}')

Video processing complete: https://generativelanguage.googleapis.com/v1beta/files/a1lfs7l5tzwy


In [None]:
import os
import google.generativeai as genai
from google.generativeai import caching
import datetime
import time

In [None]:
# Create a cache with a 5 minute TTL
cache = caching.CachedContent.create(
    model='models/gemini-1.5-flash-001',
    display_name='PDF-file', # used to identify the cache
    system_instruction=(
        'You are an expert PDF file analyzer, and your job is to answer '
        'the user\'s query based on the PDF file you have access to.'
    ),
    contents=[pdf_file],
    ttl=datetime.timedelta(minutes=15),
)


In [None]:
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)

# Query the model
response = model.generate_content([(
    'What is the title of the paper?'
    'Who are the authors? '
    'What are the major contributions of the paper accordig to the authors?'
    'they were introduced for the first time.'
)])

print(response.usage_metadata)

print(response.text)

prompt_token_count: 33300
candidates_token_count: 261
total_token_count: 33561
cached_content_token_count: 33264

The paper is titled "Stealing Part of a Production Language Model". 

The authors are:
- Nicholas Carlini
- Daniel Paleka
- Krishnamurthy (Dj) Dvijotham
- Thomas Steinke
- Jonathan Hayase
- A. Feder Cooper
- Katherine Lee
- Matthew Jagielski
- Milad Nasr
- Arthur Conmy
- Eric Wallace
- David Rolnick
- Florian Tramèr

The paper introduces the first model-stealing attack that can be applied to black-box language models. This attack allows the authors to recover the complete embedding projection layer of a transformer language model. 

The paper's major contributions are:
- The attack is the first to extract precise, nontrivial information from black-box production language models.
- The attack recovers the embedding projection layer of a transformer model given typical API access. 
- The attack confirms for the first time that OpenAI's Ada and Babbage language models have a h

In [None]:
# Construct a GenerativeModel which uses the created cache.
model = genai.GenerativeModel.from_cached_content(cached_content=cache)

# Query the model
response = model.generate_content([(
    'What is the title of the paper?'
    'Who are the authors? provide a list '
    'What are the major contributions of the paper accordig to the authors?'
    'they were introduced for the first time.'
)])

print(response.usage_metadata)

print(response.text)

prompt_token_count: 33303
candidates_token_count: 247
total_token_count: 33550
cached_content_token_count: 33264

The title of the paper is "Stealing Part of a Production Language Model".

The authors of the paper are:

1. Nicholas Carlini
2. Daniel Paleka
3. Krishnamurthy (Dj) Dvijotham
4. Thomas Steinke
5. Jonathan Hayase
6. A. Feder Cooper
7. Katherine Lee
8. Matthew Jagielski
9. Milad Nasr
10. Arthur Conmy
11. Eric Wallace
12. David Rolnick
13. Florian Tramèr

The paper's major contributions are:

- The first model-stealing attack that extracts precise, nontrivial information from black-box production language models, such as OpenAI's ChatGPT or Google's PaLM-2.
- The attack recovers the embedding projection layer of a transformer model, given typical API access. 
- The authors confirm, for the first time, that these black-box models have a hidden dimension of 1024 and 2048, respectively.
- The authors discuss potential defenses and mitigations.
- The authors discuss the implicatio

In [None]:
for c in caching.CachedContent.list():
  print(c)

CachedContent(
    name='cachedContents/zqk8uilw4ek4',
    model='models/gemini-1.5-flash-001',
    display_name='PDF-file',
    usage_metadata={
        'total_token_count': 33264,
    },
    create_time=2024-08-11 06:52:05.138716+00:00,
    update_time=2024-08-11 06:52:05.138716+00:00,
    expire_time=2024-08-11 07:07:04.854054+00:00
)
CachedContent(
    name='cachedContents/gmdhiut161iv',
    model='models/gemini-1.5-flash-001',
    display_name='sherlock jr movie',
    usage_metadata={
        'total_token_count': 33264,
    },
    create_time=2024-08-11 06:46:08.530272+00:00,
    update_time=2024-08-11 06:46:08.530272+00:00,
    expire_time=2024-08-11 07:01:07.812105+00:00
)


In [None]:
response = model.generate_content("What is the main theme of the paper?")
print(response.text)

The paper focuses on a model-stealing attack that extracts precise, non-trivial information from black-box production language models like OpenAI's ChatGPT or Google's PaLM-2. Specifically, the attack recovers the embedding projection layer (up to symmetries) of a transformer model, given typical API access.  The authors present a number of attacks, starting with an attack that only recovers the hidden dimension size of the model, then expanding to an attack that recovers the entire projection matrix. They also discuss potential defenses and mitigations for this type of attack.


In [None]:
print(response.usage_metadata)


prompt_token_count: 33274
candidates_token_count: 114
total_token_count: 33388
cached_content_token_count: 33264

