## dataset link:https://www.kaggle.com/datasets/quangnguyen711/clothes-shop-chatbot-dataset

In [13]:
# Import libraries

import pandas as pd
import json
import openai

# Set your OpenAI API key
client = openai.OpenAI(api_key="Put your OpenAI API Key Here")

In [2]:
# Load and read our training data saved as csv

mydata = pd.read_csv(r".\data\ClothesShopChatbotDataset.csv")
mydata.head()

Unnamed: 0,Context,Question,Answer
0,You can cancel your order within 8 hours of pl...,How long do I have to cancel my order?,Orders can be canceled within 8 hours. Follow ...
1,You can cancel your order within 8 hours of pl...,Is providing a reason for cancellation necessary?,"Giving a reason is optional, but it helps us i..."
2,You can cancel your order within 8 hours of pl...,Can I cancel an order after the 8-hour window?,"Unfortunately, cancellations are only accepted..."
3,You can cancel your order within 8 hours of pl...,I accidentally canceled my order. What now?,Reach out to our support team at aut.clothes.c...
4,You can cancel your order within 8 hours of pl...,Do you charge any fees for order cancellations?,No fees for cancellations within 8 hours. It's...


In [4]:
# Get the data shape
mydata.shape

(1185, 3)

In [3]:
# Get data columns
mydata.columns

Index(['Context', 'Question', 'Answer'], dtype='object')

In [12]:
# Check if there are empty values
mydata.isna().value_counts()

Context  Question  Answer
False    False     False     1185
Name: count, dtype: int64

## Data Preparation for training

In [6]:
# Get all questions and answers from our csv file in two distinct lists
all_questions = mydata["Question"]
all_answers = mydata["Answer"]

In [7]:
# Define a prompt
system_prompt = "You are an ecommerce assistant with the purpose of taking customers questions and providing them with relevant response. Customers can report incidents, request services, seek guidance, or seek assistance. You only respond to ecommerce related questions. Do not respond to questions not related to ecommerce."

# Creating our training data based on gpt training data format
training_data = list()
for count in range(len(all_answers)):
    training_data.append({"messages": [{"role": "system", "content": system_prompt}, 
                                        {"role": "user", "content": all_questions[count]}, 
                                        {"role": "assistant", "content": all_answers[count]}]})
print("Completed Building Training Data!")
print(f"Training Data Size: {len(training_data)}")
print(f"Dataset Visualization: \n {training_data[0]}")

Completed Building Training Data!
Training Data Size: 1185
Dataset Visualization: 
 {'messages': [{'role': 'system', 'content': 'You are an ecommerce assistant with the purpose of taking customers questions and providing them with relevant response. Customers can report incidents, request services, seek guidance, or seek assistance. You only respond to ecommerce related questions. Do not respond to questions not related to ecommerce.'}, {'role': 'user', 'content': 'How long do I have to cancel my order?'}, {'role': 'assistant', 'content': 'Orders can be canceled within 8 hours. Follow steps on our website under "Order History" for a cancellation.'}]}


In [14]:
# Formatting the training data into a line by line json file format
with open("D:/llm/fine_tune_gpt_ecommerce_chatbot_customer_service/data/instances.jsonl", 'w') as f:
    for conversation in training_data:
        json.dump(conversation, f)
        f.write("\n")

In [None]:
# Updlaod training data in openai platform
with open("D:/llm/fine_tune_gpt_ecommerce_chatbot_customer_service/data/instances.jsonl", 'rb') as f:
    response = client.files.create(file=f,
                                   purpose='fine-tune')
    
file_id = response.id
print(file_id)

In [None]:
# Launch the training, start a job
# file_id = file-cmrEH4cFVHWEeoshLsDGo0Z5
response = client.fine_tuning.jobs.create(
    training_file=file_id,
    model='gpt-3.5-turbo'
)

job_id = response.id
print(job_id)

In [None]:
# Check the latest training job which is our current training
# to list the last 5 training jobs, set limit=5
training_job_list = client.fine_tuning.jobs.list(limit=1)
print(training_job_list)

In [None]:
# Check the latest training job status which is our current training
training_job_status = client.fine_tuning.jobs.retrieve(job_id)
#print(training_job_status)
if training_job_status.status == 'running':
    print("Your Model is still runnning")
elif training_job_status.status == 'validating_files':
    print("Your Files are being validating")
else:
    print("Compled Model Training")
    fine_tuned_model_id = training_job_status.fine_tuned_model
    print(f" Fine Tuned Model ID is: {fine_tuned_model_id}")

# Testing the Fine Tuned Model

In [26]:
user_question = "hello"
completion = client.chat.completions.create(
    model=fine_tuned_model_id,
    messages=[{"role": "system", "content": system_prompt}, 
              {"role": "user", "content": user_question}, ]
)

# Response from the fine tuned model
print(completion.choices[0].message.content)

Hi, feel free to ask if you need any assistance. We're here to help!


In [27]:
# Create a function for chatbot reponse

def chatbotResponse(user_question):
    completion = client.chat.completions.create(
        model=fine_tuned_model_id,
        messages=[{"role": "system", "content": system_prompt}, 
                {"role": "user", "content": user_question}, ]
    )
    return completion.choices[0].message.content

In [28]:
user_question = "How long do I have to cancel my order?"
chatbotResponse(user_question)

'Orders can be canceled within 8 hours. Follow steps on our website under "Order History" for a smooth process.'

In [33]:
user_question = 'What happens if I forget to provide a reason?'
chatbotResponse(user_question)

"No worries! Providing a reason is optional. If you have feedback, feel free to share it, but it's not mandatory."

In [35]:
user_question = 'Is there a time limit for repurchasing?'
chatbotResponse(user_question)

'Repurchase anytime! However, discounts and promotions may have expiration dates, so check our website for current offers.'

In [37]:
user_question = 'I would like to know if there is a time limit for repurchasing or not?'
chatbotResponse(user_question)

'There is no time limit for repurchasing. You can revisit your order history and repurchase anytime.'

In [38]:
user_question = 'what is the capital of France?'
chatbotResponse(user_question)

'The capital of France is Paris, known for its iconic landmarks like the Eiffel Tower and Louvre Museum.'