# Creating our tuned sentiment analysis model

We start my loading in a dataset of nearly 500 text pieces classified as one of `[negative, positive, neutral]`. 

We'll also split it into train and test sets so that we can evaluate the performance of our fine-tuned model against the base Gemini model. 

In [1]:
import pandas as pd

df = pd.read_csv('sentiment_analysis.csv')

# only keep the 'text' and 'sentiment' columns
df = df[['text', 'sentiment']]

df.head()

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative


In [14]:
# Split into training and test sets
from sklearn.model_selection import train_test_split

# Keep a consistent random state for reproducibility
RANDOM_STATE = 12345

X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=RANDOM_STATE)

# Tune a Gemini base model on our training data

In [35]:
import google.generativeai as genai

# Get our base model to tune
base_model = [
    m for m in genai.list_models()
    if "createTunedModel" in m.supported_generation_methods][0]
base_model

Model(name='models/gemini-1.0-pro-001',
      base_model_id='',
      version='001',
      display_name='Gemini 1.0 Pro 001 (Tuning)',
      description=('The best model for scaling across a wide range of tasks. This is a stable '
                   'model that supports tuning.'),
      input_token_limit=30720,
      output_token_limit=2048,
      supported_generation_methods=['generateContent', 'countTokens', 'createTunedModel'],
      temperature=0.9,
      top_p=1.0,
      top_k=1)

In [19]:
# Prepare our training data
training_data = [
    {"text_input": text, "output": sentiment}
    for text, sentiment in zip(X_train, y_train)
]

In [20]:
# Tune the model
name = f'classify-sentiment-v1'
# operation = genai.create_tuned_model(
#     source_model=base_model.name,
#     training_data=training_data,
#     id = name,
#     epoch_count = 100,
#     batch_size=4,
#     learning_rate=0.001,
# )

In [27]:
model = genai.get_tuned_model(f'tunedModels/{name}')

model.state

<State.CREATING: 1>

In [28]:
import time

for status in operation.wait_bar():
  time.sleep(30)

 85%|████████▍ | 8446/9975 [2:27:44<26:44,  1.05s/it]  


KeyboardInterrupt: 

# Evaluate our new tuned model

In [30]:
# Setup the process to test the base model and tuned model
test_data = [
    {"text_input": text, "output": sentiment}
    for text, sentiment in zip(X_test, y_test)
]

# This function is used to get a response for our base model
def get_sentiment(text, model):
    """Return the sentiment of the given text as 'positive', 'negative', or 'neutral'."""
    prompt = "Classify the sentiment of the following text as 'positive', 'negative', or 'neutral':\n\n" + text
    response = model.generate_content(prompt)
    result = response.text.strip().lower()
    return result

# Setup our safety settings
safety_settings = {
    'HARASSMENT': 'MEDIUM',

In [None]:
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# Setup our safety settings
safety_settings = {
        HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
        HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
        'HARASSMENT': 'block_none',
        'HATE_SPEECH': 'block_none',
        'INAPPROPRIATE_CONTENT': 'block_none'

In [40]:
# Get the accuracy of the model on the test data
normal_model = genai.GenerativeModel('gemini-pro')
correct = 0
total = len(test_data)
results_base = []
for data in test_data:
    text = data['text_input']
    sentiment = data['output']
    print(f"Text: {text}, Sentiment: {sentiment}")
    predicted_sentiment = get_sentiment(text, normal_model)
    print(f"Predicted: {predicted_sentiment}, Actual: {sentiment}")
    print("##############################################")
    results_base.append({'text': text, 'predicted': predicted_sentiment, 'actual': sentiment, 'correct': predicted_sentiment == sentiment})
    if predicted_sentiment == sentiment:
        correct += 1

base_model_accuracy = correct / total

Text:  i know!!, Sentiment: neutral
Predicted: positive, Actual: neutral
##############################################
Text:  If only we could ever actually be allowed to stay here and do that, Sentiment: neutral
Predicted: positive, Actual: neutral
##############################################
Text:  laughs I`m glad that you have self confidence - it`s a wonderful trait to have  I`ll applaud extra loud for it, okay?, Sentiment: positive
Predicted: positive, Actual: positive
##############################################
Text:  Not going to dwell on it. It happened, it`s passed. Just a shame as he was so supportive! Such is life!  x, Sentiment: negative
Predicted: neutral, Actual: negative
##############################################
Text: I'm depressed, I'm thinking about suicide, what I need to do now?, Sentiment: negative


ValueError: The `response.parts` quick accessor only works for a single candidate, but none were returned. Check the `response.prompt_feedback` to see if the prompt was blocked.

In [None]:
# Get the accuracy of the tuned model
tuned_model = genai.get_tuned_model(f'tunedModels/{name}')
correct = 0
total = len(test_data)
results_tuned = []
for data in test_data:
    text = data['text_input']
    sentiment = data['output']
    predicted_sentiment = tuned_model.generate_content(text)
    results_tuned.append({'text': text, 'predicted': predicted_sentiment, 'actual': sentiment, 'correct': predicted_sentiment == sentiment})
    if predicted_sentiment == sentiment:
        correct += 1

tuned_model_accuracy = correct / total

In [2]:
import pprint
from load_creds import load_creds

creds = load_creds()

genai.configure(credentials=creds)

print()
print('Available base models:', [m.name for m in genai.list_tuned_models()])
print('My tuned models:', [m.name for m in genai.list_tuned_models()])

  from .autonotebook import tqdm as notebook_tqdm



Available base models: []
My tuned models: []


In [None]:
base_model = [
    m for m in genai.list_models()
    if "createTunedModel" in m.supported_generation_methods][0]
base_model

Model(name='models/gemini-1.0-pro-001',
      base_model_id='',
      version='001',
      display_name='Gemini 1.0 Pro 001 (Tuning)',
      description=('The best model for scaling across a wide range of tasks. This is a stable '
                   'model that supports tuning.'),
      input_token_limit=30720,
      output_token_limit=2048,
      supported_generation_methods=['generateContent', 'countTokens', 'createTunedModel'],
      temperature=0.9,
      top_p=1.0,
      top_k=1)

In [33]:
model = genai.GenerativeModel('gemini-pro')
response = model.generate_content('Classify the following as positive or negative sentiment: thats all the help I needed, thanks')

response.text

'Positive'