# Tweet Analysis (WIP)

### 1. Analyze correlation between log-likelihood score and reactions to each tweet
### 2. (seems not to work properly) Analyze binary sentiments of each tweet

In [9]:
from __future__ import annotations

import os
from dataclasses import dataclass

import tweepy
from dotenv import load_dotenv
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from transformers import pipeline, AutoModelForSequenceClassification, BertJapaneseTokenizer

load_dotenv()

@dataclass
class AuthenticationInfo:
    api_key: str = "",
    api_secret_key: str = "",
    bearer_token: str = "",
    access_token: str = "",
    access_token_secret: str = "",

In [2]:
auth_info = AuthenticationInfo(
    api_key=os.getenv("API_KEY"),
    api_secret_key=os.getenv("API_SECRET_KEY"),
    bearer_token=os.getenv("BEARER_TOKEN"),
)

client = tweepy.Client(
    consumer_key=auth_info.api_key,
    consumer_secret=auth_info.api_secret_key,
    bearer_token=auth_info.bearer_token,
)

In [3]:
def get_tweets_for_eval(client: tweepy.Client, user_name: str, user_id: str | None = None, limit: int = 200) -> pd.DataFrame:

    if user_id is None:
        user_id = client.get_user(username=user_name).data["id"]
    
    tweets = dict(
        tweet_id=[],
        text=[],
        score=[],
        n_likes=[],
        n_retweets=[],
    )
    # Get tweets
    for tweet in tweepy.Paginator(
        client.get_users_tweets,
        id=user_id,
        max_results=100,
        exclude=["retweets"],
        tweet_fields=["public_metrics"],
    ).flatten(limit=limit):
        
        # If log-likelihood score is not available, skip the tweet
        if "score: " not in tweet.text:
            continue
        
        splitted_tweet = tweet.text.split("score: ")
        tweets["tweet_id"].append(tweet.id)
        tweets["text"].append(splitted_tweet[0].replace("\n", "<br>"))
        tweets["score"].append(float(splitted_tweet[1]))
        tweets["n_likes"].append(tweet.data["public_metrics"]["like_count"])
        tweets["n_retweets"].append(tweet.data["public_metrics"]["retweet_count"])
        
    return pd.DataFrame(tweets)

In [4]:
df_tweets = get_tweets_for_eval(client=client, user_name="AI_15R")

In [5]:
model = AutoModelForSequenceClassification.from_pretrained("daigo/bert-base-japanese-sentiment") 
tokenizer = BertJapaneseTokenizer.from_pretrained("daigo/bert-base-japanese-sentiment")
nlp = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)     

In [6]:
sentiment_dict = dict(
   sentiment=[],
   sentiment_color=[],
   sentiment_scores=[],
)
for text in df_tweets.text:
   res = nlp(text)[0]
   
   if res["label"] == "ポジティブ":
      sentiment_dict["sentiment_color"].append("#60e0e0")
   else:
      sentiment_dict["sentiment_color"].append("#e06080")
   
   sentiment_dict["sentiment"].append(res["label"])
   sentiment_dict["sentiment_scores"].append(float(res["score"]))

df_tweets_with_sentiment = df_tweets.assign(
   sentiment=sentiment_dict["sentiment"],
   sentiment_color=sentiment_dict["sentiment_color"],
   sentiment_scores=sentiment_dict["sentiment_scores"],
)

In [7]:
df_tweets_with_sentiment

Unnamed: 0,tweet_id,text,score,n_likes,n_retweets,sentiment,sentiment_color,sentiment_scores
0,1550172747837624321,別に変な気分ではないが、オタクがめちゃくちゃ好そいすもんの話をしてるところを見ると、どうして...,-28.775,0,0,ポジティブ,#60e0e0,0.905569
1,1550171845223419904,なんか冷笑されている当人に対して、オタクは冷笑しない姿勢をとっているらしい、オタクの交差点の...,-89.548,2,4,ポジティブ,#60e0e0,0.718000
2,1550156628854419456,精神状態が乱高下しているオタクに対して、普通に可哀そうな時のてね🤚と言葉をかけてあげた方が良...,-84.819,3,0,ポジティブ,#60e0e0,0.855406
3,1550148991035543552,3日後のーチュア」の顔が頭の中に思い出されるようになってきた、これは早く覚えたい、思い出に浸...,-181.510,0,0,ポジティブ,#60e0e0,0.819985
4,1550141356538212352,オタクがトイレに篭っているのを発見したのでオタクをトイレに呼んだら、オタクがどうとか言って出...,-151.880,3,1,ポジティブ,#60e0e0,0.789078
...,...,...,...,...,...,...,...,...
156,1549113824145711104,身長が低いので、こちらがより歩くスピードが出ていた間に雪で視界がブラックアウトします、なぜで...,-108.810,0,0,ポジティブ,#60e0e0,0.960514
157,1549106184019357696,ハグとかいう謎の音を出しているオタクの死体を見るたびに音を出している謎のオタクが許せない状態...,-186.000,2,1,ポジティブ,#60e0e0,0.528650
158,1549098548884807680,人間の顔になった気分で延々動画を見続けることになったので怖い<br>人間の顔に変えた感覚が全...,-160.880,1,0,ネガティブ,#e06080,0.821181
159,1549096401329475589,会社の売上が何の給能もとらず、何の力も無い者が何の能力もない人に、何の力も無いニュースでもし...,-222.440,1,0,ポジティブ,#60e0e0,0.607822


In [10]:
fig = go.Figure()

fig.add_traces(
    go.Scatter(
        x=np.log2(df_tweets_with_sentiment.n_likes + df_tweets_with_sentiment.n_retweets),
        y=df_tweets_with_sentiment.score,
        mode="markers",
        marker=go.scatter.Marker(
            size=10,
            color=df_tweets_with_sentiment.sentiment_color,
        ),
        hovertext=df_tweets_with_sentiment.text,
    )
)

fig.update_layout(
    width=700,
    height=700,
    title="AI_15R のツイート評価 - いいね数 + リツイート数 vs. 対数尤度のスコア",
    xaxis=dict(
        title="Number of Likes + Retweets - Log Scale (Base = 2)",
    ),
    yaxis=dict(
        title="Log-likelihood Score",
    ),
    template="plotly_dark",
    font={"family": "Ubuntu", "size": 10},
)

fig.to_html()

fig.show()


divide by zero encountered in log2

