# Tweet Analysis (WIP)

##### Analyze correlation between log-likelihood score and reactions to each tweet

In [None]:
from __future__ import annotations

import os
import subprocess
from dataclasses import dataclass

import tweepy
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

load_dotenv()

@dataclass
class AuthenticationInfo:
    api_key: str = "",
    api_secret_key: str = "",
    bearer_token: str = "",
    access_token: str = "",
    access_token_secret: str = "",

In [None]:
auth_info = AuthenticationInfo(
    api_key=os.getenv("API_KEY"),
    api_secret_key=os.getenv("API_SECRET_KEY"),
    bearer_token=os.getenv("BEARER_TOKEN"),
)

client = tweepy.Client(
    consumer_key=auth_info.api_key,
    consumer_secret=auth_info.api_secret_key,
    bearer_token=auth_info.bearer_token,
)

In [None]:
def get_tweets_for_eval(client: tweepy.Client, user_name: str, user_id: str | None = None, limit: int = 200) -> pd.DataFrame:

    if user_id is None:
        user_id = client.get_user(username=user_name).data["id"]
    
    tweets = dict(
        tweet_id=[],
        text=[],
        score=[],
        n_likes=[],
        n_retweets=[],
    )
    # Get tweets
    for tweet in tweepy.Paginator(
        client.get_users_tweets,
        id=user_id,
        max_results=100,
        exclude=["retweets"],
        tweet_fields=["public_metrics"],
    ).flatten(limit=limit):
        
        # If log-likelihood score is not available, skip the tweet
        if "score: " not in tweet.text:
            continue
        
        splitted_tweet = tweet.text.split("score: ")
        tweets["tweet_id"].append(tweet.id)
        tweets["text"].append(splitted_tweet[0].replace("\n", "<br>"))
        tweets["score"].append(float(splitted_tweet[1]))
        tweets["n_likes"].append(tweet.data["public_metrics"]["like_count"])
        tweets["n_retweets"].append(tweet.data["public_metrics"]["retweet_count"])
        
    return pd.DataFrame(tweets)

In [None]:
df_tweets = get_tweets_for_eval(client=client, user_name="AI_15R", limit=400)

In [None]:
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=df_tweets.n_likes + df_tweets.n_retweets,
        y=df_tweets.score,
        mode="markers",
        marker=go.scatter.Marker(
            size=10,
            color="skyblue",
        ),
        hovertext=df_tweets.text,
    )
)

fig.update_layout(
    width=1200,
    height=600,
    title="AI_15Rのツイート評価",
    xaxis=dict(
        title="Number of Likes + Retweets",
    ),
    yaxis=dict(
        title="Log-likelihood Score",
    ),
    xaxis2=dict(
        title="Number of Likes + Retweets",
    ),
    yaxis2=dict(
        title="Masked LM Score",
    ),
    template="plotly_dark",
    font={"family": "Ubuntu", "size": 10},
    showlegend=False,
)

fig.show()