# Development

Agreement and policy: https://developer.twitter.com/en/developer-terms/agreement-and-policy

In [None]:
import os
os.chdir('..')

import pandas as pd

from modules import (
    TwitterRequest,
    FeatureExtractor
)

## Get tweets

In [None]:
query = 'edmond mulet'
start_time = "2023-04-28T00:00:00Z" 
end_time = "2023-04-29T00:00:00Z" 
max_results = 10

In [None]:
tweets, users = (
    TwitterRequest(
        query=query,
        start_time=start_time,
        end_time=end_time,
        max_results=max_results
    )
    .request()
    .extract_tweets()
    .extract_users()
    .segregate()
    .preprocess(
        tweets_prefix = 'tw_',
        users_prefix = 'us_'
    )
)

In [None]:
tweets.to_csv('tweets.csv', index=False)
tweets.shape

## Get features

In [None]:
tweets = pd.read_csv('tweets.csv')
tweets.shape

In [None]:
tweets_expanded = (
    GPTFeatureExtraction(tweets=tweets)
    .preprocess_text()
    .extract_features(prefix='tw_')
)

tweets_expanded.head()

In [None]:
tweets_expanded.to_csv('tweets_expanded.csv', index=False)

## Get dataset

In [None]:
import pandas as pd
from datetime import datetime, timedelta

start_date = datetime(2023, 4, 27, 00, 00)
end_date = datetime(2023, 4, 29, 00, 00)
delta = timedelta(days=1)

dates = []
while start_date < end_date:
    next_date = start_date + delta
    dates.append(
        (start_date.isoformat() + "Z", next_date.isoformat() + "Z")
    )
    start_date = next_date


In [None]:
for start_date, end_date in dates:
    print(
        f'Start: {start_date}',
        f'End: {end_date}',
        sep='\n'
    )

In [None]:
candidates = [
    'carlos pineda', 'sandra torres', 'edmond mulet',
    'zury rios', 'manuel conde'
]

candidates = [
    'carlos pineda', 'sandra torres'
]

max_results = 10
tweets_prefix = 'tw_'
users_prefix = 'us_'

In [None]:
tweets_collector, users_collector = [], []
for candidate in candidates:

    dates_tweets_collector, dates_users_collector = [], []
    for start_date, end_date in dates:
        
        tweets, users = (
            TwitterRequest(
                query=candidate,
                start_time=start_date,
                end_time=end_date,
                max_results=max_results
            )
            .request()
            .extract_tweets()
            .extract_users()
            .segregate()
            .preprocess(
                tweets_prefix=tweets_prefix,
                users_prefix=users_prefix
            )
        )

        tweets_with_gpt_features = (
            FeatureExtractor(tweets=tweets)
            .preprocess_text()
            .extract_features(prefix=tweets_prefix)
        )

        tweets_with_gpt_features[f"{tweets_prefix}candidate"] = candidate
        users[f"{users_prefix}candidate"] = candidate

        dates_tweets_collector.append(tweets_with_gpt_features)
        dates_users_collector.append(users)

    tweets_collector.append(pd.concat(dates_tweets_collector))
    users_collector.append(pd.concat(dates_users_collector))

tweets = pd.concat(tweets_collector, axis=0, ignore_index=True)
users = pd.concat(users_collector, axis=0, ignore_index=True)


In [None]:
tweets.to_csv('tweets_loop.csv', index=False)
users.to_csv('users_loop.csv', index=False)