In [6]:
# Importing the necessary libraries
import csv
import json
import openai
import pandas as pd

# Importing the dataset
df = pd.read_csv('../tedx-dataset/tags_export.csv')
titles = df['title']
tags = df['tag']

In [7]:
df

Unnamed: 0,title,tag
0,How do gas masks actually work?,"environment, technology, design, education, na..."
1,My epic journey becoming the fastest person to...,"sports, motivation, personal growth, humanity"
2,How AI is unlocking the secrets of nature and ...,"science, technology, innovation, future, AI, h..."
3,An updated action plan for solving the climate...,"climate change, environment, sustainability, s..."
4,The secret ingredient of business success,"business, social change, community, leadership..."
...,...,...
6407,The best stats you've ever seen,"global issues, economics, health, Africa, stat..."
6408,Do schools kill creativity?,"culture, education, creativity, dance, parenti..."
6409,Greening the ghetto,"environment, politics, sustainability, busines..."
6410,Simplicity sells,"technology, computers, entertainment, media, m..."


In [9]:
import json
OPENAI_KEY = # path to your openai key

In [24]:
# Defining prompts and labels
from pydantic import BaseModel

class OutputFormat(BaseModel):
    best_tag: str
    sentiment: str

# Creating OpenAI client
client = openai.OpenAI(api_key=OPENAI_KEY)
# Creating a request function
def request(df,number):
    # Prompt 1 (Zero-Shot)
    prompt1 = """Below you can find the title of a speech from a TEDx event.

    1) Tag
    * Choose a tag from a list of possible tags that best describes the topic of the speech. Try to pick quite a general tag that could be applied to a wide range of speeches.

    2) Sentiment
    * Assess the possible sentiment of this speech based on the title. Choose one label from the options provided. Take into account the provided sentiment explanations to guide your decision.

    Examples:
    * "How to prevent political corruption" {'tag': 'politics', 'sentiment': 'Positive'}
    * "The art of persuasive storytelling" {'tag': 'communication', 'sentiment': 'Neutral'}
    * "What's the point of digital fashion?" {'tag': 'sustainability', 'sentiment': 'Negative'}
    """


    prompt2 = f"""Title: {df['title'][number]}.
    Tags: {df['tag'][number]}.
    Sentiment labels: 'Positive, Negative, Neutral'
    Sentiment explanations: 
    1. Positive: Includes titles that convey optimism, inspiration, motivation, or hope. This category combines elements of positivity and inspiration, offering insight into titles that aim to motivate or uplift.
    2. Negative: Encompasses titles that communicate pessimism, criticism, anxiety, or difficult emotions. This category can serve as an indicator of titles that address challenging topics or evoke negative emotions.
    3. Neutral: Covers titles that are informational, educational, or factual in nature, with a neutral emotional tone. This category is useful for identifying titles that focus primarily on conveying knowledge without a strong emotional charge.
    """
    # 1. **Pozytywny/Inspirowany**: Obejmuje tytuły, które wyrażają optymizm, inspirację, motywację lub nadzieję. Ta kategoria łączy elementy pozytywności i inspiracji, dając wgląd w tytuły, które starają się motywować lub podnieść na duchu.
    # 2. **Negatywny/Krytyczny**: Zawiera tytuły, które przekazują pesymizm, krytykę, niepokój lub trudne emocje. Ta klasa może służyć jako wskaźnik tytułów, które poruszają trudne tematy lub wywołują negatywne emocje.
    # 3. **Neutralny/Edukacyjny**: Obejmuje tytuły o charakterze informacyjnym, edukacyjnym lub faktualnym, które mają neutralny ton emocjonalny. Ta klasa jest przydatna do identyfikacji tytułów, które koncentrują się głównie na przekazywaniu wiedzy bez wyraźnego ładunku emocjonalnego.

    completion = client.beta.chat.completions.parse(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": prompt1},
            {"role": "user", "content": prompt2},
        ],
        response_format=OutputFormat,
    )
    return completion.choices[0].message.parsed


In [23]:
df.iloc[1,:]

title    My epic journey becoming the fastest person to...
tag          sports, motivation, personal growth, humanity
Name: 1, dtype: object

In [25]:
response = request(df,1)
response

OutputFormat(best_tag='motivation', sentiment='Positive')

In [18]:
response.best_tag

'education'

In [19]:
len(df)

6412

In [28]:
df['best_tag'] = None
df['sentiment'] = None
for i in range(0,10):
    response = request(df,i)
    print(df.iloc[i,:])
    print(response)
    print("--------------------")
    df.loc[i, 'best_tag'] = response.best_tag
    df.loc[i, 'sentiment'] = response.sentiment

title                          How do gas masks actually work?
tag          environment, technology, design, education, na...
best_tag                                                  None
sentiment                                                 None
Name: 0, dtype: object
best_tag='education' sentiment='Neutral'
--------------------
title        My epic journey becoming the fastest person to...
tag              sports, motivation, personal growth, humanity
best_tag                                                  None
sentiment                                                 None
Name: 1, dtype: object
best_tag='motivation' sentiment='Positive'
--------------------
title        How AI is unlocking the secrets of nature and ...
tag          science, technology, innovation, future, AI, h...
best_tag                                                  None
sentiment                                                 None
Name: 2, dtype: object
best_tag='science' sentiment='Positive'
---------

In [35]:
from tqdm import tqdm
for i in tqdm(range(1001,len(df))):
    response = request(df,i)
    df.loc[i, 'best_tag'] = response.best_tag
    df.loc[i, 'sentiment'] = response.sentiment

100%|██████████| 5411/5411 [1:01:22<00:00,  1.47it/s]


In [36]:
df

Unnamed: 0,title,tag,best_tag,sentiment
0,How do gas masks actually work?,"environment, technology, design, education, na...",education,Neutral
1,My epic journey becoming the fastest person to...,"sports, motivation, personal growth, humanity",personal growth,Positive
2,How AI is unlocking the secrets of nature and ...,"science, technology, innovation, future, AI, h...",science,Positive
3,An updated action plan for solving the climate...,"climate change, environment, sustainability, s...",climate change,Positive
4,The secret ingredient of business success,"business, social change, community, leadership...",business,Positive
...,...,...,...,...
6407,The best stats you've ever seen,"global issues, economics, health, Africa, stat...",statistics,Positive
6408,Do schools kill creativity?,"culture, education, creativity, dance, parenti...",education,Negative
6409,Greening the ghetto,"environment, politics, sustainability, busines...",sustainability,Positive
6410,Simplicity sells,"technology, computers, entertainment, media, m...",UX design,Positive


In [37]:
df.to_csv('tags_export_gpt_checkpoint.csv', index=True)