# Creating our tuned sentiment analysis model

We start my loading in a dataset of nearly 500 text pieces classified as one of `[negative, positive, neutral]`. 

We'll also split it into train and test sets so that we can evaluate the performance of our fine-tuned model against the base Gemini model. 

In [3]:
import pandas as pd

df = pd.read_csv('sentiment_analysis.csv')

# only keep the 'text' and 'sentiment' columns
df = df[['text', 'sentiment']]

df.head()

Unnamed: 0,text,sentiment
0,What a great day!!! Looks like dream.,positive
1,"I feel sorry, I miss you here in the sea beach",positive
2,Don't angry me,negative
3,We attend in the class just for listening teac...,negative
4,"Those who want to go, let them go",negative


In [4]:
# Split into training and test sets
from sklearn.model_selection import train_test_split

# Keep a consistent random state for reproducibility
RANDOM_STATE = 12345

X_train, X_test, y_train, y_test = train_test_split(df['text'], df['sentiment'], test_size=0.2, random_state=RANDOM_STATE)

# Tune a Gemini base model on our training data

In [7]:
import pprint
from load_creds import load_creds
import google.generativeai as genai

creds = load_creds()

genai.configure(credentials=creds)

print()
print('Available base models:', [m.name for m in genai.list_tuned_models()])
print('My tuned models:', [m.name for m in genai.list_tuned_models()])


Available base models: ['tunedModels/classify-sentiment-v1']
My tuned models: ['tunedModels/classify-sentiment-v1']


In [8]:
# Get our base model to tune
base_model = [
    m for m in genai.list_models()
    if "createTunedModel" in m.supported_generation_methods][0]
base_model

Model(name='models/gemini-1.0-pro-001',
      base_model_id='',
      version='001',
      display_name='Gemini 1.0 Pro 001 (Tuning)',
      description=('The best model for scaling across a wide range of tasks. This is a stable '
                   'model that supports tuning.'),
      input_token_limit=30720,
      output_token_limit=2048,
      supported_generation_methods=['generateContent', 'countTokens', 'createTunedModel'],
      temperature=0.9,
      top_p=1.0,
      top_k=1)

In [9]:
# Prepare our training data
training_data = [
    {"text_input": text, "output": sentiment}
    for text, sentiment in zip(X_train, y_train)
]

In [10]:
# Tune the model
name = f'classify-sentiment-v1'
# operation = genai.create_tuned_model(
#     source_model=base_model.name,
#     training_data=training_data,
#     id = name,
#     epoch_count = 100,
#     batch_size=4,
#     learning_rate=0.001,
# )

In [11]:
model = genai.get_tuned_model(f'tunedModels/{name}')

model.state

<State.ACTIVE: 2>

In [None]:
import pandas as pd
import seaborn as sns

model = operation.result()

snapshots = pd.DataFrame(model.tuning_task.snapshots)

sns.lineplot(data=snapshots, x = 'epoch', y='mean_loss')

# Evaluate our new tuned model

In [23]:
# Setup our general settings
config = {
            "max_output_tokens": 2048, 
            "temperature": 0, 
            "top_p": 1, 
            "top_k": 32
        }

# Setup our safety settings
safety_config = [
    {
        "category": "HARM_CATEGORY_DANGEROUS",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HARASSMENT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_HATE_SPEECH",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
        "threshold": "BLOCK_NONE",
    },
    {
        "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
        "threshold": "BLOCK_NONE",
    },
]

In [15]:
# Setup the process to test the base model and tuned model
test_data = [
    {"text_input": text, "output": sentiment}
    for text, sentiment in zip(X_test, y_test)
]

# This function is used to get a response for our base model
def get_sentiment(text, model):
    """Return the sentiment of the given text as 'positive', 'negative', or 'neutral'."""
    prompt = "Classify the sentiment of the following text as 'positive', 'negative', or 'neutral':\n\n" + text
    response = model.generate_content(prompt, safety_settings=safety_config)
    result = response.text.strip().lower()
    return result

In [25]:
results = []
for data in test_data:
    text = data['text_input']
    sentiment = data['output']
    results.append({'text': text, 'actual': sentiment})

In [26]:
# Get the accuracy of the model on the test data
normal_model = genai.GenerativeModel('gemini-pro')
tuned_model = genai.GenerativeModel(model_name="tunedModels/classify-sentiment-v1", safety_settings=safety_config)
for data in results:
    if 'base_predicted' not in data:
        data['base_predicted'] = get_sentiment(data['text'], normal_model)
    if 'tuned_predicted' not in data:
        data['tuned_predicted'] = tuned_model.generate_content(text).text.strip().lower()

In [27]:
# Calculate the percent of test points that have been predicted for each model
base_predicted = [data['base_predicted'] for data in results]
tuned_predicted = [data['tuned_predicted'] for data in results]
actual = [data['actual'] for data in results]

print('Base model # predictions:', len(base_predicted))
print('Tuned model # predictions:', len(tuned_predicted))
print('Actual # predictions:', len(actual))

Base model # predictions: 100
Tuned model # predictions: 100
Actual # predictions: 100


In [28]:
# Calculate accuracy for both models
normal_correct = 0
tuned_correct = 0
for data in results:
    if data['actual'] == data['base_predicted']:
        normal_correct += 1
    if data['actual'] == data['tuned_predicted']:
        tuned_correct += 1

normal_accuracy = normal_correct / len(results)
tuned_accuracy = tuned_correct / len(results)

print(f'Base model accuracy: {normal_accuracy:.2%}')
print(f'Tuned model accuracy: {tuned_accuracy:.2%}')

Base model accuracy: 68.00%
Tuned model accuracy: 34.00%


It turns out our tuned model is not more accurate than the base model. We will stick with the base model for this project. 