In [None]:
import os
if 'COLAB_RELEASE_TAG' in os.environ:
  from google.colab import userdata
  import pathlib
  pathlib.Path('client_secret.json').write_text(userdata.get('CLIENT_SECRET'))

  # Use `--no-browser` in colab
  !gcloud auth application-default login --no-browser --client-id-file client_secret.json --scopes='https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/generative-language.tuning'
else:
  !gcloud auth application-default login --client-id-file client_secret.json --scopes='https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/generative-language.tuning'

In [None]:
!pip install -q google-generativeai

### Import libraries

In [None]:
import google.generativeai as genai

In [None]:
for i, m in zip(range(5), genai.list_tuned_models()):
  print(m.name)

tunedModels/generate-num-5306
tunedModels/categoryclassifier31072024


## Dataset Formatting

In [None]:
import csv

filename = "Training Dataset.csv"
training_data = []

with open(filename, mode='r', newline='') as file:
    reader = csv.DictReader(file)

    for row in reader:
        training_data.append({
            'text_input': row['Query'],
            'output': row['Category'].strip()
        })

print(training_data)


[{'text_input': 'Can you help me write a message to my friend?', 'output': 'Communication'}, {'text_input': "I'm feeling really stressed, can you give me some tips on how to manage my anxiety?", 'output': 'Communication'}, {'text_input': 'What are some good chatbot platforms for customer service?', 'output': 'Communication'}, {'text_input': "I'm looking for a virtual assistant that can help me with scheduling and email management.", 'output': 'Communication'}, {'text_input': 'How can I improve my communication skills in a professional setting?', 'output': 'Communication'}, {'text_input': 'I need to talk to someone about my mental health, where can I find resources?', 'output': 'Communication'}, {'text_input': 'What are the benefits of using a chatbot for customer support?', 'output': 'Communication'}, {'text_input': 'How can I make my conversations more engaging?', 'output': 'Communication'}, {'text_input': 'I am depressed after failing in my exam and uncertain about my future.', 'outp

In [None]:
import csv

filename = "Validation Dataset.csv"

validation_data = []

with open(filename, mode='r', newline='') as file:
    reader = csv.DictReader(file)

    for row in reader:
        validation_data.append({
            'text_input': row['Query'],
            'output': row['Category'].strip()
        })

print(validation_data)


[{'text_input': 'Can you help me find a chatbot that specializes in mental health?', 'output': 'Communication'}, {'text_input': "I need to have a conversation with someone about my anxiety, but I'm too nervous to talk to a real person.", 'output': 'Communication'}, {'text_input': 'Is there a virtual assistant that can help me schedule therapy appointments?', 'output': 'Communication'}, {'text_input': "I'm looking for a chatbot that can help me manage my stress.", 'output': 'Communication'}, {'text_input': 'What are some good resources for online therapy or virtual mental health support?', 'output': 'Communication'}, {'text_input': 'Generate a photorealistic image of a cat riding a unicorn in a field of sunflowers.', 'output': 'Visual Art'}, {'text_input': 'Create a short animated video of a penguin dancing on a beach.', 'output': 'Visual Art'}, {'text_input': 'Can you make a surreal video of a melting clock on a staircase?', 'output': 'Visual Art'}, {'text_input': 'I need a photo of a 

## Create tuned model

In [None]:
base_model = [
    m for m in genai.list_models()
    if "createTunedModel" in m.supported_generation_methods][0]
base_model

Model(name='models/gemini-1.0-pro-001',
      base_model_id='',
      version='001',
      display_name='Gemini 1.0 Pro 001 (Tuning)',
      description=('The best model for scaling across a wide range of tasks. This is a stable '
                   'model that supports tuning.'),
      input_token_limit=30720,
      output_token_limit=2048,
      supported_generation_methods=['generateContent', 'countTokens', 'createTunedModel'],
      temperature=0.9,
      max_temperature=None,
      top_p=1.0,
      top_k=None)

In [None]:
name = "categoryclassifier31072024"
operation = genai.create_tuned_model(
    source_model=base_model.name,
    display_name="categoryclassifier-new",
    training_data=training_data,
    id = name,
    epoch_count = 5,
    batch_size=5,
    learning_rate=0.001,
)

In [None]:
name = "categoryclassifier31072024"
model = genai.get_tuned_model(f'tunedModels/{name}')

model

TunedModel(name='tunedModels/categoryclassifier31072024',
           source_model='models/gemini-1.0-pro-001',
           base_model='models/gemini-1.0-pro-001',
           display_name='categoryclassifier',
           description='',
           temperature=0.9,
           top_p=1.0,
           top_k=0,
           state=<State.ACTIVE: 2>,
           create_time=datetime.datetime(2024, 7, 30, 19, 9, 58, 497261, tzinfo=datetime.timezone.utc),
           update_time=datetime.datetime(2024, 7, 30, 19, 30, 56, 768210, tzinfo=datetime.timezone.utc),
           tuning_task=TuningTask(start_time=datetime.datetime(2024, 7, 30, 19, 10, 0, 372279, tzinfo=datetime.timezone.utc),
                                  complete_time=None,
                                  snapshots=[...],
                                  hyperparameters=Hyperparameters(epoch_count=100,
                                                                  batch_size=4,
                                                        

In [None]:
model.state

<State.ACTIVE: 2>

### Check tuning progress

In [None]:
print(operation.metadata)

total_steps: 300
completed_steps: 79
completed_percent: 26.33333396911621
snapshots {
  step: 1
  mean_loss: 25.305644989013672
  compute_time {
    seconds: 1722432759
    nanos: 825793075
  }
}
snapshots {
  step: 2
  mean_loss: 26.37003517150879
  compute_time {
    seconds: 1722432805
    nanos: 4139188
  }
}
snapshots {
  step: 3
  mean_loss: 22.38521385192871
  compute_time {
    seconds: 1722432849
    nanos: 499304249
  }
}
snapshots {
  step: 4
  mean_loss: 22.98593521118164
  compute_time {
    seconds: 1722432865
    nanos: 995988001
  }
}
snapshots {
  step: 5
  mean_loss: 23.081951141357422
  compute_time {
    seconds: 1722432875
    nanos: 990036536
  }
}
snapshots {
  step: 6
  mean_loss: 21.281742095947266
  compute_time {
    seconds: 1722432883
    nanos: 995764545
  }
}
snapshots {
  step: 7
  mean_loss: 19.93014907836914
  compute_time {
    seconds: 1722432897
    nanos: 312994626
  }
}
snapshots {
  step: 8
  mean_loss: 12.081938743591309
  compute_time {
    sec

In [None]:
# import time

# for status in operation.wait_bar():
#   time.sleep(30)

## Evaluate model

In [None]:
model = genai.GenerativeModel(model_name=f'tunedModels/categoryclassifier31072024')

In [None]:
result = model.generate_content('Write a short summary of the latest developments in the war in Ukraine.')
result.text

'News'

In [None]:
result = model.generate_content('How to convert text to speech for my audio book?')
result.text

'Music and Audio'

In [None]:
result = model.generate_content('I need help integrating the Stripe API into my Node.js application.')
result.text

'Programming and Development'

In [None]:
result = model.generate_content('Can you help me generate a lease agreement for my apartment?')
result.text

'Legal and Professional Services'

In [None]:
result = model.generate_content('Can you create a news story about the latest advancements in artificial intelligence?')
result.text


'Media and Journalism'

In [None]:
result = model.generate_content('Generate an email to potential investors outlining our business plan.')
result.text


'Business and Productivity'

# Evaluation with Langsmith

In [None]:
!pip install -U langsmith

Collecting langsmith
  Downloading langsmith-0.1.94-py3-none-any.whl.metadata (13 kB)
Collecting orjson<4.0.0,>=3.9.14 (from langsmith)
  Downloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
Downloading langsmith-0.1.94-py3-none-any.whl (139 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m139.9/139.9 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading orjson-3.10.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (141 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m141.1/141.1 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: orjson, langsmith
Successfully installed langsmith-0.1.94 orjson-3.10.6


In [None]:
import os
os.environ["LANGCHAIN_API_KEY"]=""
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.smith.langchain.com"
os.environ["LANGCHAIN_PROJECT"] = "evaluation-CategoryClassifier"

In [None]:
from langsmith.schemas import Example, Run

def correct_label(root_run: Run, example: Example) -> dict:
    score = root_run.outputs.get("output") == example.outputs.get("label")
    return {"score": int(score), "key": "correct_label"}

In [None]:
from langsmith import traceable
import time
@traceable(
    run_type="llm",
    name="CategoryClassifier",
    project_name="evaluation-CategoryClassifier"
)
def call_gemini(message):
    time.sleep(20)
    model = genai.GenerativeModel(model_name=f'tunedModels/categoryclassifier31072024')
    response = model.generate_content(message)
    return response.text

In [None]:
from langsmith.evaluation import evaluate

dataset_name = "validation-small"

results = evaluate(
    lambda inputs: call_gemini(inputs["input"]),
    data=dataset_name,
    evaluators=[correct_label],
    experiment_prefix="Category Classifier 1",
    description="Testing the baseline system.",
)

View the evaluation results for experiment: 'Category Classifier 1-0ea0a59d' at:
https://smith.langchain.com/o/1853d09d-a4bc-5061-ba35-467b757dc359/datasets/567aa36d-17f8-46ce-812f-208e33911acc/compare?selectedSessions=edade47c-b728-42a5-b4a9-6788f1e33c4f




0it [00:00, ?it/s]

ERROR:langsmith.evaluation._runner:Error running target function: 429 Resource has been exhausted (e.g. check quota).
ERROR:langsmith.evaluation._runner:Error running target function: 429 Resource has been exhausted (e.g. check quota).
