# Development

Agreement and policy: https://developer.twitter.com/en/developer-terms/agreement-and-policy

In [1]:
import os
os.chdir('..')

import pandas as pd

from modules import (
    TwitterRequest,
    GPTFeatureExtraction
)

## Get tweets

In [None]:
query = 'edmond mulet'
start_time = "2023-04-28T00:00:00Z" 
end_time = "2023-04-29T00:00:00Z" 
max_results = 10

In [None]:
tweets, users = (
    TwitterRequest(
        query=query,
        start_time=start_time,
        end_time=end_time,
        max_results=max_results
    )
    .request()
    .extract_tweets()
    .extract_users()
    .segregate()
    .preprocess(
        tweets_prefix = 'tw_',
        users_prefix = 'us_'
    )
)

In [None]:
tweets.to_csv('tweets.csv', index=False)
tweets.shape

## Get features

In [2]:
tweets = pd.read_csv('tweets.csv')
tweets.shape

(10, 11)

In [3]:
tweets_expanded = (
    GPTFeatureExtraction(tweets=tweets)
    .preprocess_text()
    .extract_features(prefix='tw_')
)

tweets_expanded.head()

## Zero-shot feature extraction with GTP-3.5

In [None]:
import re
import time
import json
import openai
import pandas as pd
from authenticators import OpenAIAuthenticator

gpt = OpenAIAuthenticator()
openai.api_key = gpt.api_key

In [None]:
tweets = pd.read_csv('tweets.csv')
tweets.shape


In [None]:
df = (
    GPTFeatureExtraction(tweets=tweets)
    .preprocess_text()
    .extract_features(prefix='tw_')
)

df.head()

In [None]:
def preprocess_text(text: str) -> str:
    # Remove URLs
    url_pattern = re.compile(r'http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+')
    text = url_pattern.sub('', text)

    # Remove emojis
    emoji_pattern = re.compile(
        "["
        u"\U0001F600-\U0001F64F"  # Emoticons
        u"\U0001F300-\U0001F5FF"  # Symbols & pictographs
        u"\U0001F680-\U0001F6FF"  # Transport & map symbols
        u"\U0001F1E0-\U0001F1FF"  # Flags (iOS)
        u"\U00002702-\U000027B0"
        u"\U000024C2-\U0001F251"
        "]+",
        flags=re.UNICODE,
    )
    text = emoji_pattern.sub('', text)

    # Remove newline characters
    text = text.replace('\n', ' ')

    return text

preprocessed_tweets = (
    tweets
    .assign(
        tw_texto = lambda x: x['tw_texto'].apply(lambda x: preprocess_text(x))
    )
)

preprocessed_tweets.head()

In [None]:
prefix = 'tw_'
collector = []

def gpt_features(prompt: str, model: str = "gpt-3.5-turbo", temperature: int = 0) -> str: 
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    return response.choices[0].message["content"]

for index, row in preprocessed_tweets.iterrows():
    prompt = f"""
        El siguiente es un tweet en español. Por favor, clasifícalo de acuerdo a las siguientes categorías:

        Valencia: [positivo, negativo, neutral]
        Emoción: [felicidad, tristeza, enojo, miedo, sorpresa, disgusto, neutral]
        Postura: [aprobación, desaprobación, esperanza, desilusión, indiferencia, condianza, desconfianza]
        Tono: [agresivo, pasivo, asertivo, escéptico, irónico, humorístico, informativo, serio, inspiridor, otro]
        Tema: [política, economía, deportes, entretenimiento, tecnología, ciencia, salud, educación, religión, cultura, medio ambiente, otro]

        Además, evalúalo utilizando una escala continua con rango de 0 a 1 en las siguientes dimensiones:

        Amabilidad: [0.0 - 1.0]
        Legibilidad: [0.0 - 1.0]
        Controversialidad: [0.0 - 1.0]
        Informatividad: [0.0 - 1.0]
        Espectro político: [0.0 - 1.0] (0 = izquierda, 1 = derecha)

        Formatea tu respuesta como un diccionario de Python con las siguientes llaves:

        [
            {prefix}valencia, {prefix}emocion, {prefix}postura, {prefix}tono, {prefix}tema, {prefix}amabilidad, 
            {prefix}legibilidad, {prefix}controversialidad, {prefix}informatividad, {prefix}espectro_politico
        ]

        Tweet: '''{row['tw_texto']}'''
        """

    response = gpt_features(prompt)
    response = json.loads(response)
    response = pd.DataFrame([response])
    collector.append(response)
    time.sleep(1)

new_features = pd.concat(collector, axis=0, ignore_index=True)
expanded_tweets = pd.concat([preprocessed_tweets, new_features], axis=1)

In [None]:
expanded_tweets.head()

In [None]:
expanded_tweets.to_csv('expanded_tweets.csv', index=False)