# GPT-3.5 Classifier 

## Arousal (with examples)

In [1]:
import pandas as pd

In [77]:
%cd "/Users/ardenspehar/Desktop/research-proj/research-proj/classifier" 

/Users/ardenspehar/Desktop/research-proj/research-proj/classifier


In [78]:
df=pd.read_csv('/Users/ardenspehar/Desktop/research-proj/research-proj/classifier/smaller_df1.csv') # load in the data using pandas and take a peek at the first 10 rows
df.head(10) #grab the text column 

Unnamed: 0.1,Unnamed: 0,cond,SID,value,article_number
0,1,self,s001,It means you don't have to try very hard to co...,8
1,2,self,s002,not so good i feel about this one,62
2,3,social,s003,Maybe it could do more than just make people r...,72
3,4,social,s004,"Maybe dancing won't just keep the heart young,...",14
4,5,social,s005,CBD is it a waste of money? Or can it help us?...,88
5,6,social,s008,This is about the attempt to remove the endang...,89
6,7,self,s009,Healthy and nutritional value of banana over c...,68
7,8,social,s010,CBD can help people with their pain,88
8,9,social,s012,Kids skipped school to protest demand on clima...,109
9,10,self,s014,I'm interested in alternative energy.,1


In [79]:
# Read in your examples as `examples` and take a peek at the dataframe
examples=pd.read_csv('/Users/ardenspehar/Desktop/research-proj/research-proj/classifier/arousal-examples2.csv', index_col=0)
examples.head(10)

Unnamed: 0_level_0,value,rating
SID,Unnamed: 1_level_1,Unnamed: 2_level_1
s005,5 Takeaways from Climate TECH recent conferenc...,1
s230,"Being a dog lover, I have found that having a ...",3
s337,"This is primarily an attack on my home state, ...",5


In [80]:
inputs = df.value.values #save the text as an object called inputs. we'll use this to run our observations through the pipeline

In [81]:
# View the first 5 text values to check everything out 
print(inputs[:5])

["It means you don't have to try very hard to count calories as long as you cut out universally unhealthy things like sugar."
 'not so good i feel about this one'
 'Maybe it could do more than just make people run faster. Maybe do other things faster.'
 "Maybe dancing won't just keep the heart young, but your brain too."
 'CBD is it a waste of money? Or can it help us? Many unknowns. Might be fake and a waste.']


In [5]:
#load other dependencies 
import argparse 
import json
import os
import numpy as np
import openai
import pandas as pd
import random
import time
from tqdm.notebook import tqdm
from dotenv import load_dotenv
import time

In [83]:
title='BBPRIME'
subtitle='Arousal'
stim_set=title+'-'+subtitle
seed=1 #set seed to make sure we get reproducable results 
temperature=0.1 #want a low baking temp to have little variability or creativity, and so we can get the same results every. single. time. range is 0-1
engine='gpt-3.5-turbo' #change this to use different models available to you via OpenAI
n_context=1
cache = True   
resume=False
# MIDI='freq' #or 'name', 'number', 'freq'
audience='People'
item='Text'

load_dotenv()

apiKey = os.environ.get('api_key') #for stevens key. update to you whatever you're calling your api_key. it should look like this in your file




openai.api_key = apiKey #installize the key so we can access

if cache:
    random.seed(seed)

In [None]:
def generate_prompt(current): #(example_idxs, current_pair):
    prompt = f"""Your task is to label the level of arousal of a short text. 
    Low arousal text is dull, unengaging, unexciting, evoking little to no emotional or cognitive reactions. 
    High arousal text is exceptionally engaging, evoking intense emotional and cognitive reactions.
    You will be given text to rate. 
    Answer with only a whole number between 1 and 5, with 1 being low arousal and 5 being high arousal

Text: {examples.value.iloc[0]} 
Rating: {examples.rating.iloc[0]}
    
Text: {examples.value.iloc[1]}
Rating: {examples.rating.iloc[1]}

Text: {examples.value.iloc[2]}
Rating: {examples.rating.iloc[2]}

Text: {inputs[current]}
Rating:
"""
    if n_context==0:
        prompt = f"""Label the level of arousal in this short text. Answer only with a number between 1 and 5: 1 if low arousal 3 if high arousal. Here is the text:\n{inputs[current]}"""
    
    return prompt

cache_folder = f'cache/{stim_set}'
os.makedirs(cache_folder, exist_ok=True)

def create_cache_filename():
    filename = f'{stim_set}-{n_context}-{engine}-{temperature}-{seed}'
    # if args.shuffle_context_each_draw:
    #     filename += '-shuffle'
    return os.path.join(cache_folder, filename + '.json')

if not resume:
    visited = []
    predicted_ratings = np.zeros((len(inputs)))
    request_count = 0

cache = {}

if cache and os.path.exists(create_cache_filename()):
    cache = json.load(open(create_cache_filename()))
cached_keys = list(cache.keys())

for idx1, bname1 in enumerate(tqdm(inputs)):
    current = idx1
    if current in visited and predicted_ratings[current] != 0:
        print(f'Already visited {current}')
        continue

    visited.append(current)

    key = f'{current}'
    if key in cached_keys:
        choices = cache[key]['choices']
        print('Using cached choices for key', key)
    else:
        prompt = generate_prompt(current)
        response = False
        i = 0
        while not response:
            i += 1
            try:
                response = openai.ChatCompletion.create(
                    model=engine,
                    messages=[{'role': 'user', 'content': prompt}],
                    temperature=temperature,
                    timeout=10
                )
            except Exception as e:
                print(f'Attempt {i} failed\n{e}\n')
                
                time.sleep(7) #7 second delay 
        choices = [dict(choice.items()) for choice in response.choices]

        cache[key] = {
            'prompt': prompt,
            'choices': choices,
            'created': response.created
        }

        request_count += 1
        if cache and request_count % 5 == 0:
            json.dump(cache, open(create_cache_filename(), 'w'))

    try:
        answer = choices[0]['message']['content'].replace('\n', '').strip()
        predicted_ratings[idx1] = int(answer)
    except Exception as e:
        print(f'Error: {e}\n\n{cache[key]}')

os.makedirs(f'predictions/{stim_set}', exist_ok=True)
np.save(f'predictions/{stim_set}/{stim_set}-{n_context}-{engine}-{temperature}-{seed}.npy', predicted_ratings)


In [43]:
df=pd.read_csv("/Users/ardenspehar/Desktop/research-proj/research-proj/classifier/smaller_df1.csv") #read in the OG dataframe again
predicted_ratings=np.load(f'predictions/{stim_set}/{stim_set}-{n_context}-{engine}-{temperature}-{seed}.npy', allow_pickle=True)
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'predictions/BBPRIME-Tone/BBPRIME-Tone-1-gpt-3.5-turbo-0.1-1.npy'

In [None]:
predicted_ratings # just to check it ran smoothly
df.columns

In [30]:
arousal_df_gpt35_250=pd.DataFrame(predicted_ratings, columns=['gpt_arousal_ratings'])
arousal_df_gpt35_250['text']=df.value.values #update to whatever the txt column is called
arousal_df_gpt35_250['SID']=df.SID.values #update to SID column 

In [31]:
arousal_df_gpt35_250.head()

Unnamed: 0,gpt_arousal_ratings,text,SID
0,2.0,"If you skip breakfast before you workout, it ...",s001
1,2.0,"If you skip breakfast before you workout, it ...",s001
2,1.0,This matters to me personally because I am try...,s052
3,1.0,This matters to me personally because I am try...,s052
4,2.0,A study apparently shows breakfast is not the ...,s119


In [60]:
#save the work
output_directory = '/Users/ardenspehar/Desktop/research-proj/research-proj/gpt-and-raters/arousal_results/final/' #update to where you want to save the work
os.makedirs(output_directory, exist_ok=True)

arousal_df_gpt35_250.to_csv(os.path.join(output_directory, f'{stim_set}-{n_context}-{engine}-{temperature}-{seed}.csv'), index=False) #saving the pandas dataframe to a csv

## Emotional Tone (with examples)

In [1]:
import pandas as pd
    # set directory to get .env file

In [2]:
%cd "/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research" 

/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research


In [58]:
#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df1.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df2.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df3.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df4.csv') # load in the data using pandas and take a peek at the first 10 rows

df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df5.csv') # load in the data using pandas and take a peek at the first 10 row#s

df.head(10) #grab the text column 

Unnamed: 0.1,Unnamed: 0,cond,SID,value,article_number
0,1,self,s001,Climate change is making current wildfires all...,7
1,2,self,s002,Banana rich in potassiums and good than anything,68
2,3,social,s003,Because its fighting the misinformation so pre...,77
3,4,social,s004,Keeping active seems like it is good for every...,18
4,5,social,s005,The BOKS program for children and exercise bef...,86
5,6,social,s008,It seems that CBD oil is not as beneficial as ...,88
6,7,self,s009,The article covers an inspiring story on advoc...,65
7,8,social,s010,they can take the changes and apply them to th...,84
8,9,social,s012,Local governments are suing oil and gas compan...,105
9,10,self,s014,An athlete is working on environmental preserv...,5


In [51]:
# Tone examples 
examples=pd.read_csv('//Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/examples/tone-rating-ex.csv') #update with the relevant CSV path
examples.head(10)

Unnamed: 0,text number,text,rating
0,1,Consuming less is beneficial for earth conserv...,1
1,2,I am a young person who is attempting to becom...,1
2,3,​​Runners are entering races that are above th...,1
3,4,Spending less time working out for great resul...,1
4,5,I used to drink a lot of carbonated beverages ...,0
5,6,It is warning people that climate change is re...,0
6,7,Disaster funds are very slow to reach the peop...,0
7,8,"If you take statins, beware of risk of staph s...",0


In [59]:
inputs = df.value.values #save the text
print(inputs[:3])

['Climate change is making current wildfires all over the world even worse'
 'Banana rich in potassiums and good than anything'
 'Because its fighting the misinformation so prevelant in media these days.']


In [47]:
#load other dependencies 
import argparse
import json
import os
import numpy as np
import openai
import pandas as pd
import random
import time
from tqdm.notebook import tqdm
from dotenv import load_dotenv

In [48]:
title='BBPRIME'
subtitle='Tone'
stim_set=title+'-'+subtitle
seed=1 #set seed to make sure we get reproducable results 
temperature=0.1 #want a low baking temp to have little variability or creativity, and so we can get the same results every. single. time. range is 0-1
engine='gpt-3.5-turbo' #change this to use different models available to you via OpenAI
n_context=1
cache = True   
resume=False
# MIDI='freq' #or 'name', 'number', 'freq'
audience='People'
item='Text'


load_dotenv()

apiKey = os.environ.get('steven_key') #for stevens key. update to you whatever you're calling your api_key. it should look like this in your file

openai.api_key = apiKey #installize the key so we can access

if cache:
    random.seed(seed)

In [61]:
def generate_prompt(current):#(example_idxs, current_pair):
    prompt = f"""Your task is to label the emotional tone of a short text.
    Emotional tone is the degree of positivity or negativity of a text. 
    Positive tone has a more upbeat style and can include words like “good, well, new, love” but is not limited to these words. 
    Negative tone reveals anxiety, sadness, and hostility and can include words like “bad, wrong, hate, too much” but is not limited to these words. 
    You will be provided with 8 examples of text and associated ratings. Then you will be given text to rate. 
    Answer with only a whole number, with 0  being negative tone and 1 being positive tone



Text: {examples.text.iloc[0]} 
Rating: {examples.rating.iloc[0]}
    
Text: {examples.text.iloc[1]}
Rating: {examples.rating.iloc[1]}

Text: {examples.text.iloc[2]}
Rating: {examples.rating.iloc[2]}

Text: {examples.text.iloc[3]}
Rating: {examples.rating.iloc[3]}

Text: {examples.text.iloc[4]}
Rating: {examples.rating.iloc[4]}

Text: {examples.text.iloc[5]}
Rating: {examples.rating.iloc[5]}

Text: {examples.text.iloc[6]}
Rating: {examples.rating.iloc[6]}

Text: {examples.text.iloc[7]}
Rating: {examples.rating.iloc[7]}

Text: {inputs[current]}
Rating:
"""
    if n_context==0:
        prompt = f"""Label the emotional tone of this text. Answer with only a number, with 0 being negative tone and 1 being positive tone. Here is the text:\n{inputs[current]}"""
    
    return prompt

cache_folder = f'cache/{stim_set}'
os.makedirs(cache_folder, exist_ok=True)

def create_cache_filename():
    filename = f'{stim_set}-{n_context}-{engine}-{temperature}-{seed}'
    # if args.shuffle_context_each_draw:
    #     filename += '-shuffle'
    return os.path.join(cache_folder, filename + '.json')

if not resume:
    visited = []
    predicted_ratings = np.zeros((len(inputs)))
    request_count = 0

cache = {}

if cache and os.path.exists(create_cache_filename()):
    cache = json.load(open(create_cache_filename()))
cached_keys = list(cache.keys())

for idx1, bname1 in enumerate(tqdm(inputs)):
    current = idx1
    if current in visited and predicted_ratings[current] != 0:
        print(f'Already visited {current}')
        continue

    visited.append(current)

    key = f'{current}'
    if key in cached_keys:
        choices = cache[key]['choices']
        print('Using cached choices for key', key)
    else:
        prompt = generate_prompt(current)
        response = False
        i = 0
        while not response:
            i += 1
            try:
                response = openai.ChatCompletion.create(
                    model=engine,
                    messages=[{'role': 'user', 'content': prompt}],
                    temperature=temperature,
                    timeout=10
                )
            except Exception as e:
                print(f'Attempt {i} failed\n{e}\n')
                
                time.sleep(7) #7 second delay 
        choices = [dict(choice.items()) for choice in response.choices]

        cache[key] = {
            'prompt': prompt,
            'choices': choices,
            'created': response.created
        }

        request_count += 1
        if cache and request_count % 5 == 0:
            json.dump(cache, open(create_cache_filename(), 'w'))

    try:
        answer = choices[0]['message']['content'].replace('\n', '').strip()
        predicted_ratings[idx1] = int(answer)
    except Exception as e:
        print(f'Error: {e}\n\n{cache[key]}')

os.makedirs(f'predictions/{stim_set}', exist_ok=True)
np.save(f'predictions/{stim_set}/{stim_set}-{n_context}-{engine}-{temperature}-{seed}.npy', predicted_ratings)


  0%|          | 0/794 [00:00<?, ?it/s]

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.

Attempt 1 failed
The server is overloaded or not ready yet.



In [62]:
#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df1.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df2.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df3.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df4.csv') # load in the data using pandas and take a peek at the first 10 rows

#df=pd.read_csv('/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/data/smaller_df5.csv') # load in the data using pandas and take a peek at the first 10 row#s

df.head(10) #grab the text column predicted_ratings=np.load(f'predictions/{stim_set}/{stim_set}-{n_context}-{engine}-{temperature}-{seed}.npy', allow_pickle=True)
df.head()

Unnamed: 0.1,Unnamed: 0,cond,SID,value,article_number
0,1,self,s001,Climate change is making current wildfires all...,7
1,2,self,s002,Banana rich in potassiums and good than anything,68
2,3,social,s003,Because its fighting the misinformation so pre...,77
3,4,social,s004,Keeping active seems like it is good for every...,18
4,5,social,s005,The BOKS program for children and exercise bef...,86


In [63]:
predicted_ratings # just to check it ran smoothly
df.columns

Index(['Unnamed: 0', 'cond', 'SID', 'value', 'article_number'], dtype='object')

In [64]:
tone_df_gpt35=pd.DataFrame(predicted_ratings, columns=['tone_ratings'])
tone_df_gpt35['value']=df.value.values #update with text column 
tone_df_gpt35['SID']=df.SID.values #update with indexing variable 

In [65]:
tone_df_gpt35.head()

Unnamed: 0,tone_ratings,value,SID
0,0.0,Climate change is making current wildfires all...,s001
1,1.0,Banana rich in potassiums and good than anything,s002
2,1.0,Because its fighting the misinformation so pre...,s003
3,1.0,Keeping active seems like it is good for every...,s004
4,1.0,The BOKS program for children and exercise bef...,s005


In [66]:
#save the work
output_directory = '/Users/stevenmesquiti/Desktop/Ardens-Project/mindcore-research/tone_results/final/' #update to your own directory 
os.makedirs(output_directory, exist_ok=True)
tone_df_gpt35.to_csv(os.path.join(output_directory, f'{stim_set}-{n_context}-{engine}-{temperature}-{seed}.csv'), index=False)