# Use and Evaluate Fine-tuned Model

In [1]:
import os
import openai

from dotenv import load_dotenv, find_dotenv

_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key  = os.getenv('OPENAI_API_KEY')

In [2]:
fine_tuned_model = "ft:gpt-3.5-turbo-0613:personal::8B9gl9uo"     #obtained from fine tuning process
instruction = "Classify the input as either 'Likely fake.' or 'Not suspected to be fake.'."

In [3]:
def classify_news(input_news, instruction=instruction, fine_tuned_model=fine_tuned_model):
    completion = openai.ChatCompletion.create(
      model=fine_tuned_model,
      messages=[
        {"role": "system", "content": instruction},
        {"role": "user", "content": input_news}
      ]
    )
    return completion.choices[0].message

In [4]:
def split_news(text_file):
    with open(text_file, "r") as f:
        string_whole = f.read()
    list_news = string_whole.split('#')
    return list_news

In [5]:
fake_news_test = split_news('onion_test.txt')
response_ft_fake = []
response_org_fake = []
len(fake_news_test)

10

In [6]:
print('-------Fake news-----------')

print('Responses from fine-tuned model:')
for news in fake_news_test:
    response_ft_fake.append(classify_news(news))
    print(response_ft_fake[-1])
    
print('---------------------------')
print('Responses from original model:')
for news in fake_news_test:
    response_org_fake.append(classify_news(news, fine_tuned_model="gpt-3.5-turbo-0613"))
    print(response_org_fake[-1])

-------Fake news-----------
Responses from fine-tuned model:
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
---------------------------
Responses from original model:
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}
{
  "role": "assistant",
  "content": "Likely fake."
}


In [7]:
def response_ft_org(news_list, fine_tuned_model=fine_tuned_model, original_model="gpt-3.5-turbo-0613"):
    response_ft = [classify_news(news, fine_tuned_model=fine_tuned_model)['content'] for news in news_list]
    response_org = [classify_news(news, fine_tuned_model=original_model)['content'] for news in news_list]
    return response_ft, response_org

In [8]:
cnn_news_test = split_news('cnn_test.txt')
print(len(cnn_news_test))
cnn_resp_ft, cnn_resp_org, = response_ft_org(cnn_news_test)

10


In [11]:
print('-------CNN news-----------')
print('Responses from fine-tuned model:')
for response in cnn_resp_ft:
    print(response)
print('---------------------------')
print('Responses from original model:')
for response in cnn_resp_org:
    print(response)

-------CNN news-----------
Responses from fine-tuned model:
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
---------------------------
Responses from original model:
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Not suspected to be fake.
Likely fake.


## Conclusion

Fine-tuned model shows 100% accuracy, better than original model's 90%, for a testing sample size of 20 in total. For more robust testing, use larger testing sample and try other sources of news. Note: both training and testing data are balanced or roughly balanced.

For usage, the function classify_news can be readily used to identify obvious fake news.