# Test

Agreement and policy: https://developer.twitter.com/en/developer-terms/agreement-and-policy

In [None]:
import os
os.chdir('..')

import requests
import tweepy
import pandas as pd
from modules import TwitterAuthentication

twitter = TwitterAuthentication()
client = tweepy.Client(bearer_token=twitter.bearer_token)

## Tweepy example

¿En qué necesito pensar?

- ¿Qué campos necesito? Listo.
- ¿Cómo expandir public metrics?
- Esquema para extraer datos uniformemente durante los últimos 7 días
- Establecer qué políticos tomar en cuenta. Scrape the list from here: https://es.wikipedia.org/wiki/Elecciones_generales_de_Guatemala_de_2023
- ¿Guardar los datos en una base de datos SQL local?


### Get tweets

In [None]:
def get_tweets(query: str, start_time: str, end_time: str, max_results: int) -> requests.Response:
    # Get tweets from Twitter API, no retweets or replies
    query = query = f"{query} -is:retweet -is:reply"
    tweets = client.search_recent_tweets(
        query=query,
        start_time=start_time,
        end_time=end_time,
        max_results=max_results,
        tweet_fields=[
            "author_id", "created_at", "text", "geo", "id", "lang", 
            "public_metrics", "possibly_sensitive", "referenced_tweets"
        ],
        user_fields=[
            "username", "name", "created_at", "description", "id", 
            "entities", "location", "pinned_tweet_id", "profile_image_url", 
            "public_metrics", "verified", "withheld"
        ],
        expansions=[
            "author_id", "referenced_tweets.id"
        ]
    )
    return tweets

### Store them in a DataFrame

In [None]:
def to_dataframe(tweets: requests.Response) -> pd.DataFrame:
    # Extract tweet and user data
    tweet_data = []
    for tweet in tweets.data:
        tweet_dict = {key: getattr(tweet, key) for key in tweet.data.keys()}
        # Expand public_metrics dictionary and merge it with the tweet_dict
        public_metrics = tweet_dict.pop('public_metrics')
        tweet_dict.update(public_metrics)
        tweet_data.append(tweet_dict)

    # Create a DataFrame from the tweet data
    df = pd.DataFrame(tweet_data)

    # Include user data in the DataFrame
    users = {user.id: user for user in tweets.includes['users']}
    for key, user in users.items():
        user_data = {f"user_{key}": getattr(user, key) for key in user.data.keys()}
        # Expand user_public_metrics dictionary and merge it with the user_data
        public_metrics_user = user_data.pop('user_public_metrics')
        user_data.update({f"user_{k}": v for k, v in public_metrics_user.items()})
        users[key] = user_data

    df['user_data'] = df['author_id'].apply(lambda x: users[x])

    # Merge user_data dictionary with the DataFrame
    user_columns = pd.json_normalize(df['user_data']).columns
    for col in user_columns:
        df[col] = df['user_data'].apply(lambda x: x.get(col, None))

    df = df.drop(columns=['user_data'])

    return df

### Clean the data

In [None]:
df = to_dataframe(tweets)
df.head()

### Example usage

In [None]:
query = 'sammy morales'
start_time = "2023-04-25T00:00:00Z" 
end_time = "2023-04-28T00:00:00Z" 
max_results = 10

tweets = get_tweets(
    query=query,
    start_time=start_time,
    end_time=end_time,
    max_results=max_results,
)

tweets_df = to_dataframe(tweets)

tweets_df.head()