# Tweet Analysis (WIP)

### 1. Analyze correlation between log-likelihood score and reactions to each tweet
### 2. (seems not to work properly) Analyze binary sentiments of each tweet

In [None]:
from __future__ import annotations

import os
from dataclasses import dataclass

import tweepy
from dotenv import load_dotenv
import pandas as pd
import plotly.graph_objects as go
from transformers import pipeline, AutoModelForSequenceClassification, BertJapaneseTokenizer

load_dotenv()

@dataclass
class AuthenticationInfo:
    api_key: str = "",
    api_secret_key: str = "",
    bearer_token: str = "",
    access_token: str = "",
    access_token_secret: str = "",

In [None]:
auth_info = AuthenticationInfo(
    api_key=os.getenv("API_KEY"),
    api_secret_key=os.getenv("API_SECRET_KEY"),
    bearer_token=os.getenv("BEARER_TOKEN"),
)

client = tweepy.Client(
    consumer_key=auth_info.api_key,
    consumer_secret=auth_info.api_secret_key,
    bearer_token=auth_info.bearer_token,
)

In [None]:
def get_tweets_for_eval(client: tweepy.Client, user_name: str, user_id: str | None = None, limit: int = 200) -> pd.DataFrame:

    if user_id is None:
        user_id = client.get_user(username=user_name).data["id"]
    
    tweets = dict(
        tweet_id=[],
        text=[],
        score=[],
        n_likes=[],
        n_retweets=[],
    )
    # Get tweets
    for tweet in tweepy.Paginator(
        client.get_users_tweets,
        id=user_id,
        max_results=100,
        exclude=["retweets"],
        tweet_fields=["public_metrics"],
    ).flatten(limit=limit):
        
        # If log-likelihood score is not available, skip the tweet
        if "score: " not in tweet.text:
            continue
        
        splitted_tweet = tweet.text.split("score: ")
        tweets["tweet_id"].append(tweet.id)
        tweets["text"].append(splitted_tweet[0].replace("\n", "<br>"))
        tweets["score"].append(float(splitted_tweet[1]))
        tweets["n_likes"].append(tweet.data["public_metrics"]["like_count"])
        tweets["n_retweets"].append(tweet.data["public_metrics"]["retweet_count"])
        
    return pd.DataFrame(tweets)

In [None]:
df_tweets = get_tweets_for_eval(client=client, user_name="AI_15R")

In [None]:
model = AutoModelForSequenceClassification.from_pretrained("daigo/bert-base-japanese-sentiment") 
tokenizer = BertJapaneseTokenizer.from_pretrained("daigo/bert-base-japanese-sentiment")
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)     

In [None]:
sentiment_dict = dict(
   sentiment=[],
   sentiment_color=[],
   sentiment_scores=[],
)
for text in df_tweets.text:
   res = nlp(text)[0]
   
   if res["label"] == "ポジティブ":
      sentiment_dict["sentiment_color"].append("#60e0e0")
   else:
      sentiment_dict["sentiment_color"].append("#e06080")
   
   sentiment_dict["sentiment"].append(res["label"])
   sentiment_dict["sentiment_scores"].append(float(res["score"]))

df_tweets_with_sentiment = df_tweets.assign(
   sentiment=sentiment_dict["sentiment"],
   sentiment_color=sentiment_dict["sentiment_color"],
   sentiment_scores=sentiment_dict["sentiment_scores"],
)

In [None]:
df_tweets_with_sentiment

In [None]:
fig = go.Figure()

fig.add_traces(
    go.Scatter(
        x=df_tweets_with_sentiment.n_likes + df_tweets_with_sentiment.n_retweets,
        y=df_tweets_with_sentiment.score,
        mode="markers",
        marker=go.scatter.Marker(
            size=10,
            color=df_tweets_with_sentiment.sentiment_color,
        ),
        hovertext=df_tweets_with_sentiment.text,
    )
)

fig.update_layout(
    width=700,
    height=700,
    title="AI_15R のツイート評価 - いいね数 + リツイート数 vs. 対数尤度のスコア",
    xaxis=dict(
        title="Number of Likes + Retweets",
    ),
    yaxis=dict(
        title="Log-likelihood Score",
    ),
    template="plotly_dark",
    font={"family": "Ubuntu", "size": 10},
)

fig.to_html()

fig.show()