In [1]:
import os
from environs import Env

from openai import OpenAI
import json
import pandas as pd

In [2]:
# to print out all the outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', None)

In [3]:
env = Env()
env.read_env(".env") # read .env file, if it exists

# api_key = os.getenv("OPENAI_API_KEY")

True

In [4]:
df = pd.read_csv('../data/twitter_validation.csv', names=['id', 'site', 'label', 'tweet'], header=None)
df.head()

Unnamed: 0,id,site,label,tweet
0,3364,Facebook,Irrelevant,"I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣"
1,352,Amazon,Neutral,BBC News - Amazon boss Jeff Bezos rejects claims company acted like a 'drug dealer' bbc.co.uk/news/av/busine…
2,8312,Microsoft,Negative,@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄
3,4371,CS-GO,Negative,"CSGO matchmaking is so full of closet hacking, it's a truly awful game."
4,4433,Google,Neutral,Now the President is slapping Americans in the face that he really did commit an unlawful act after his acquittal! From Discover on Google vanityfair.com/news/2020/02/t…


In [5]:
df = df[['tweet']]
df.head()

Unnamed: 0,tweet
0,"I mentioned on Facebook that I was struggling for motivation to go for a run the other day, which has been translated by Tom’s great auntie as ‘Hayley can’t get out of bed’ and told to his grandma, who now thinks I’m a lazy, terrible person 🤣"
1,BBC News - Amazon boss Jeff Bezos rejects claims company acted like a 'drug dealer' bbc.co.uk/news/av/busine…
2,@Microsoft Why do I pay for WORD when it functions so poorly on my @SamsungUS Chromebook? 🙄
3,"CSGO matchmaking is so full of closet hacking, it's a truly awful game."
4,Now the President is slapping Americans in the face that he really did commit an unlawful act after his acquittal! From Discover on Google vanityfair.com/news/2020/02/t…


In [6]:
batch_size = 2

sample_df = df.sample(6)                              # the sampled df with only 100 tweets
result_df = pd.DataFrame(columns=['Tweet', 'Tone'])     # the df where we will store the results

In [7]:
sample_df.shape[0]

6

In [8]:
client = OpenAI(
    # This is the default and can be omitted
    api_key=os.environ.get("OPENAI_API_KEY"),
)

In [23]:
list_data = []
for i in range(0, sample_df.shape[0], batch_size):
    batch = sample_df['tweet'][i:i+batch_size].tolist()
    stringified_batch = json.dumps(batch)
    
    response = client.chat.completions.create(
        model="gpt-3.5-turbo-1106",
        messages=[
            {"role": "system", "content": 'You are a helpful assistant that categorizes tweets into sentiment and tone. Use "Positive", "Negative", or "Neutral" for sentiment. For each review, please state the sentiment, tone, and the tweet. '},
            {"role": "user", "content": stringified_batch}
        ]
    )
    data = response.choices[0].message.content
    print('==============')
    print(data)

Neutral, Informative: "India Bans 118 Chinese apps including PUBG #PUBG #PUBGMOBILE pic.twitter.com/4RT8ykyxM3"
Positive, Neutral, "Going through this right now, great reporting by @NightCityLifeDE"
Negative, Neutral, "Lol I set up this string of tinder dates a while back where I would just bail on them without messaging them. I figured I would save them the disappointment. I made up excuses later like \u201coh you know socially this just works out fo me and makes me money so work but maye next year"
Positive, Neutral, ":) *Realize the Witcher was actually good*"
Negative, Negative, "It’s nice not to have facebook anymore but I can’t say I hate it when my cuz tells me the latest drama. Like how Jeremy Beuss’ skinny white ass tries to spout ‘correct’ gender roles and anti-abortion/women’s health while he displays internalized homophobia and toxic masculinity"


In [14]:
# Convert the list to JSON
json_data = json.dumps(list_data, indent=4)  # indent parameter for pretty formatting (optional)

# Print or use the JSON data as needed
print(json_data)

[
    "{\n  \"results\": [\n    {\n      \"sentiment\": \"Neutral\",\n      \"tone\": \"Neutral\",\n      \"tweet\": \"God when \\\\\\\\/\"\n    },\n    {\n      \"sentiment\": \"Negative\",\n      \"tone\": \"Neutral\",\n      \"tweet\": \"India Bans 118 Chinese apps including PUBG #PUBG #PUBGMOBILE pic.twitter.com/4RT8ykyxM3\"\n    }\n  ]\n}",
    "{\n  \"results\": [\n    {\n      \"sentiment\": \"Positive\",\n      \"tone\": \"Neutral\",\n      \"tweet\": \"Going through this right now, great reporting by @NightCityLifeDE\"\n    },\n    {\n      \"sentiment\": \"Negative\",\n      \"tone\": \"Negative\",\n      \"tweet\": \"Lol I set up this string of tinder dates a while back where I would just bail on them without messaging them. I figured I would save them the disappointment. I made up excuses later like \\\"oh you know socially this just works out for me and makes me money so work but maybe next year\"\n    }\n  ]\n}",
    "{\n  \"results\": [\n    {\n      \"sentiment\": \"Pos

In [19]:
json_data

'[\n    "{\\n  \\"results\\": [\\n    {\\n      \\"sentiment\\": \\"Neutral\\",\\n      \\"tone\\": \\"Neutral\\",\\n      \\"tweet\\": \\"God when \\\\\\\\\\\\\\\\/\\"\\n    },\\n    {\\n      \\"sentiment\\": \\"Negative\\",\\n      \\"tone\\": \\"Neutral\\",\\n      \\"tweet\\": \\"India Bans 118 Chinese apps including PUBG #PUBG #PUBGMOBILE pic.twitter.com/4RT8ykyxM3\\"\\n    }\\n  ]\\n}",\n    "{\\n  \\"results\\": [\\n    {\\n      \\"sentiment\\": \\"Positive\\",\\n      \\"tone\\": \\"Neutral\\",\\n      \\"tweet\\": \\"Going through this right now, great reporting by @NightCityLifeDE\\"\\n    },\\n    {\\n      \\"sentiment\\": \\"Negative\\",\\n      \\"tone\\": \\"Negative\\",\\n      \\"tweet\\": \\"Lol I set up this string of tinder dates a while back where I would just bail on them without messaging them. I figured I would save them the disappointment. I made up excuses later like \\\\\\"oh you know socially this just works out for me and makes me money so work but maybe 

In [None]:
data = {
    "tweets": [
        {"sentiment": "Neutral", "tone": "Neutral", "tweet": "God when \r"},
        {"sentiment": "Negative", "tone": "Neutral", "tweet": "India Bans 118 Chinese apps including PUBG #PUBG #PUBGMOBILE pic.twitter.com/4RT8ykyxM3"},
    ]
}

# Convert to DataFrame
df = pd.DataFrame(data['tweets'])

# Print or use the DataFrame as needed
print(df)