In [1]:
!pip install cohere -q

Collecting cohere
  Downloading cohere-5.13.0-py3-none-any.whl.metadata (3.5 kB)
Collecting fastavro<2.0.0,>=1.9.4 (from cohere)
  Downloading fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting httpx-sse==0.4.0 (from cohere)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting parameterized<0.10.0,>=0.9.0 (from cohere)
  Downloading parameterized-0.9.0-py2.py3-none-any.whl.metadata (18 kB)
Collecting types-requests<3.0.0,>=2.0.0 (from cohere)
  Downloading types_requests-2.32.0.20241016-py3-none-any.whl.metadata (1.9 kB)
Downloading cohere-5.13.0-py3-none-any.whl (249 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m249.7/249.7 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)
Downloading fastavro-1.9.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 

In [8]:
import os
import numpy as np
import pandas as pd
import cohere
import json
from cohere import ClassifyExample
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

In [3]:
co = cohere.Client('COHERE_API_KEY')

# Step 1: Prepare the Dataset
We'll use the Airline Travel Information System (ATIS) intent classification dataset [kaggle]. For demonstration purposes, we’ll take just a small portion of the dataset: 1,000 data points in total.

In [4]:
# Load the dataset to a dataframe
df = pd.read_csv('https://raw.githubusercontent.com/cohere-ai/notebooks/main/notebooks/data/atis_subset.csv', names=['query','intent'])


In [6]:
df_train, df_test = train_test_split(df, test_size=0.2, random_state=21)

Our goal is to train the classifier so it can predict the class of a new customer inquiry out of eight classes, as follows:

['atis_flight',
 'atis_airfare',
 'atis_ground_service',
 'atis_flight_time',
 'atis_airline',
 'atis_quantity',
 'atis_abbreviation',
 'atis_aircraft']

We transform the data to JSONL format to match the style expected by the Classification endpoint (documentation).

In [9]:
def create_classification_data(text, label):
  formatted_data = {
      "text": text,
      "label": label
  }
  return formatted_data

#check if file path is not created alread
if not os.path.isfile("data.jsonl"):
  #print creating json..
  print("creating jsonl file...")
  #open a new file to write the data in
  with open("data.jsonl", "w+") as file:
    #loop through the training data without the index
    for row in df_train.itertuples():
      #using the create_classification_data function above, create the data needed
      data = create_classification_data(row.query, row.intent)
      #write it in file as json
      file.write(json.dumps(data) + "\n")

creating jsonl file...


# Step 2: Fine-Tune the Model
We kick off a fine-tuning job by navigating to the fine-tuning tab of the Dashboard. Under "Classify", click on "Create a Classify model".

# Step 3: Use/Evaluate the Fine-Tuned Model
Once the model has finished fine-tuning, it’s time to evaluate its performance. Navigate to the API tab of the fine-tuned model. There, you'll see the model ID that you should use when calling co.classify().

In [11]:
# Generate classification predictions on the test dataset using the finetuned model

#classification function
def classify_text_finetuned(texts):
  classifications = co.classify(
      model = '2d9d1f99-2946-4d3d-8511-d568ee64bf77-ft',
      inputs = texts,
      examples = None
  ).classifications
  return [c.predictions[0]  for c in classifications]

In [13]:
# Create batches of texts and classify them
BATCH_SIZE = 90 # The API accepts a maximum of 96 inputs
y_pred = []
for i in range(0, len(df_test), BATCH_SIZE):
    batch_texts = df_test["query"][i:i+BATCH_SIZE].tolist()
    y_pred.extend(classify_text_finetuned(batch_texts))


In [14]:
# Compute metrics on the test dataset
accuracy = accuracy_score(df_test["intent"], y_pred)
f1 = f1_score(df_test["intent"], y_pred, average='weighted')

print(f'Accuracy: {100*accuracy:.2f}')
print(f'F1-score: {100*f1:.2f}')

Accuracy: 98.50
F1-score: 98.55
