In [None]:
import pandas
import numpy as np
import dotenv
dotenv.load_dotenv()

In [None]:
import os
ANTHROPIC_API_KEY = os.environ.get('ANTHROPIC_API_KEY')
print(ANTHROPIC_API_KEY)

In [None]:
# import anthropic
# import requests
# import bs4
# from rich import print
# import time

# client = anthropic.Client(ANTHROPIC_API_KEY)

# response = client.beta.prompt_caching.messages.create(
#  model="claude-3-haiku-2024-03-07",
#  max_tokens=1024,
#  system=[
#   {
#    type="text",
#    text="Act as our Operating System and DBMS teacher and answer the following questions:",
#   }
#  ])
##no point in using this as the cache lifetime is only 5 minutes when unused

## Using Gemini

In [None]:
import vertexai
#from vertexai.generative_models import GenerationConfig, GenerativeModel
from vertexai.preview.generative_models import GenerativeModel
from vertexai.preview import caching

from vertexai.generative_models import Part
from vertexai.preview import caching
import datetime

from IPython.display import display, Markdown

project_id = "sascha-playground-doit"
vertexai.init(project=project_id, location="us-central1")

In [None]:
import time
from contextlib import contextmanager

@contextmanager
def measure_time():
    start_time = time.perf_counter()
    yield
    end_time = time.perf_counter()
    elapsed_time = end_time - start_time
    print(f"Elapsed time: {elapsed_time:.4f} seconds")

## without using cache

In [None]:
system_instruction = """
You are an expert book reader, and you answer user's query based on the book you have access to.
"""

video = Part.from_uri(
    mime_type="application/pdf",
    uri="./books/OS.pdf",)

model = GenerativeModel(
    "gemini-1.5-pro-001",
    system_instruction=[system_instruction]
  )

In [None]:
with measure_time():
  response = model.generate_content(
      [video, """What is the difference between a process and a thread?"""],
  )
  print(response.usage_metadata)

## Create Cache

In [None]:
system_instruction = """
You are an expert video analyzer, and you answer user's query based on the video file you have access to.
"""

contents = [
    Part.from_uri(
    mime_type="video/mp4",
    uri="gs://doit-ml-demo/gemini/caching/video/Getting started with Gemini on Vertex AI.mp4")
]

cached_content = caching.CachedContent.create(
    model_name="gemini-1.5-pro-001",
    #model_name="gemini-1.5-flash-001",
    system_instruction=system_instruction,
    contents=contents,
    ttl=datetime.timedelta(minutes=60),
)

cache_name = cached_content.name
print(cache_name)

In [None]:
cached_content = caching.CachedContent(cached_content_name=cache_name)
cached_content

In [None]:
%%time
model = GenerativeModel.from_cached_content(cached_content=cached_content)
response = model.generate_content("provide a summary for the video")
print(response.usage_metadata)

In [None]:
Markdown(response.text)

## Making the query best suited

In [65]:
import google.generativeai as genai
import dotenv

In [66]:
def genai_connect():
     dotenv.load_dotenv()
     genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
     model = genai.GenerativeModel('gemini-1.5-flash')
     return model

In [68]:
model = genai_connect()
response = model.generate_content("What is the difference between a process and a thread? Reply with text and not markdown.")
print(response.text)

A process is a separate instance of a running program, while a thread is a unit of execution within a process. 

Here's a breakdown:

**Process:**

* A process has its own memory space, resources, and address space.
* Processes are independent entities that cannot directly access each other's data.
* Communication between processes requires explicit mechanisms like inter-process communication (IPC).
* Launching a process is a heavyweight operation that consumes more system resources than creating a thread.
* Example: Opening a new application on your computer creates a separate process.

**Thread:**

* A thread shares the same memory space and resources with other threads within the same process.
* Threads can communicate and share data directly within the process.
* Creating and managing threads is a lightweight operation compared to processes.
* Multiple threads within a process can run concurrently, providing a form of parallelism.
* Example: A web server might use multiple threads 

## using Anthropic API for CN and OOP

In [None]:
import anthropic
# Create an instance of the Anthropics API client
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)  
print()


In [None]:
def ask_claude(query):
    response = client.messages.create(
        model="claude-3-5-sonnet-20240620",
        max_tokens=1024,
        messages=[{"role": "user", "content": query}],
        system="You are an expert in computer networks and Object Oriented Programming, and you answer user's multiple choice questions based on your knowledge. Keep your answers concise mentioning only the option that you think is the correct answer , don't mention the reason and answer with at most 10 words.",
    )
    return response

In [None]:
response = ask_claude(" A proxy server is used as the computer? the options are  external access acting as a backup performing file handling accessing user permissions")

In [None]:
print(response.content[0].text)