# 1. Import Packages and Libraries

In [137]:
import pandas as pd
import numpy as np
import altair as alt
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
import nx_altair as nxa
import tensorflow as tf
import keras as k
import pickle

%matplotlib inline

# 2. Read in Data

In [39]:
df = pd.read_csv('../../GenerativeAI tweets.csv').iloc[:,1:]
df['Datetime'] = pd.to_datetime(df['Datetime'])
df['Date'] = pd.to_datetime(df['Datetime'].apply(lambda x:x.date()))
df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Date
0,2023-04-19 21:27:19+00:00,1648800467206672384,From Studio Gangster to Synthetic Gangster 🎤.....,resembleai,2023-04-19
1,2023-04-19 21:27:09+00:00,1648800425540476929,Took me some time to find this. I build this #...,devaanparbhoo,2023-04-19
2,2023-04-19 21:26:57+00:00,1648800376479715328,Mind blowing next wave #generativeai platform...,timreha,2023-04-19
3,2023-04-19 21:26:49+00:00,1648800341193027584,Open Source Generative AI Image Specialist Sta...,VirtReview,2023-04-19
4,2023-04-19 21:25:00+00:00,1648799883934203905,Are you an #HR leader considering which future...,FrozeElle,2023-04-19
...,...,...,...,...,...
56216,2022-04-24 16:40:01+00:00,1518268535276904448,"Understanding Generative AI, Its Impacts and L...",analyticsinme,2022-04-24
56217,2022-04-23 07:23:24+00:00,1517766068592381952,Y ya puedes empezar a crear #arte con @thegeni...,iia_es,2022-04-23
56218,2022-04-22 08:20:21+00:00,1517418013812830208,"NVIDIA researchers have developed GANverse3D, ...",VideoGenAI,2022-04-22
56219,2022-04-21 13:15:21+00:00,1517129866403008512,Tech Trend 2022: เทรนด์เทคโนโลยีสำหรับปี 2022 ...,sitthinuntp,2022-04-21


# 3. Tweet Quantity Over Time

Hypothesis: tweet volume related to generative ai has increased over time between April 2022 and April 2023

### Data Prep

In [62]:
daily_volume = df.groupby('Date').count()['Datetime']
daily_volume.name = 'Daily Volume'

avg_tweet_rate = np.cumsum(daily_volume) / np.arange(1,len(daily_volume) + 1)
avg_tweet_rate.name = 'Average Tweet Rate'

avg_tweet_rate_7 = daily_volume.rolling(window = 7).mean()
avg_tweet_rate_7.name = '7 Day Rolling Tweet Rate'

tweet_quantity = pd.concat([daily_volume,avg_tweet_rate, avg_tweet_rate_7], axis = 1)
tweet_quantity.reset_index(inplace = True)

tweet_quantity = round(tweet_quantity, 2)

final_tweet_quantity = pd.DataFrame()

for col in ['Daily Volume','Average Tweet Rate','7 Day Rolling Tweet Rate']:
    
    subset = tweet_quantity[['Date',col]].copy()
    subset.columns = ['Date','Tweet Volume']
    subset['Metric'] = col
    final_tweet_quantity = pd.concat([final_tweet_quantity,subset])

final_tweet_quantity

Unnamed: 0,Date,Tweet Volume,Metric
0,2022-04-21,2.00,Daily Volume
1,2022-04-22,1.00,Daily Volume
2,2022-04-23,1.00,Daily Volume
3,2022-04-24,1.00,Daily Volume
4,2022-04-25,7.00,Daily Volume
...,...,...,...
336,2023-04-15,654.57,7 Day Rolling Tweet Rate
337,2023-04-16,676.43,7 Day Rolling Tweet Rate
338,2023-04-17,715.00,7 Day Rolling Tweet Rate
339,2023-04-18,745.57,7 Day Rolling Tweet Rate


### Chart Config

In [133]:
selection = alt.selection_multi(fields=['Metric'], bind='legend')
selection2 = alt.selection_multi(fields=['Metric'], bind='legend')
opacity_value = 0.8

chart = alt.Chart(final_tweet_quantity,title = '#GenerativeAI Tweet Volume Over Time').mark_line().encode(
    x = alt.X('Date'),
    y = alt.Y('Tweet Volume',title = 'Tweet Volume'),
    color = alt.Color('Metric',scale=alt.Scale(
        domain=['Daily Volume', '7 Day Rolling Tweet Rate','Average Tweet Rate','ChatGPT Release','GPT 4 Release'],
        range=['lightblue', 'blue','orange','black','grey'])),
    tooltip = ['Date','Tweet Volume','Metric'],
    opacity = alt.condition(selection, alt.value(opacity_value), alt.value(0.1))
).interactive()

chart = chart.add_selection(selection)

products = pd.DataFrame({
  'Date': ['2022-11-30', '2023-03-14'],
  'Metric': ['ChatGPT Release','GPT 4 Release']
})

products['Date'] = pd.to_datetime(products['Date'])

chatGPT_release = alt.Chart(products).mark_rule().encode(
  x='Date:T',
  color=alt.Color('Metric', scale=alt.Scale(
        domain=['ChatGPT Release', 'GPT 4 Release'],
        range=['black', 'gray'])),
    opacity = alt.condition(selection2, alt.value(opacity_value), alt.value(0.1)),
    tooltip = ['Date',alt.Tooltip('Metric', title = 'Product Release')]
).interactive()

chatGPT_release = chatGPT_release.add_selection(selection2)

### Chart

In [134]:
chart + chatGPT_release

# 4. Sentiment Over Time
Hypothesis: Sentiment towards generative ai has improved over time, especially since the chatGPT release

### Data Prep

In [147]:
def scorer_nn1(ytrue, ypred):
    
    ytrue = ytrue.numpy()
    ypred = (ypred.numpy()).argmax(axis = 1)
    
    confuse = confusion_matrix(ytrue, ypred)
    
    score = (confuse.diagonal() / confuse.sum(axis = 1)).mean()
    
    if pd.isnull(score):
        score = 0
    return score

model = k.models.load_model('../sentiment_analysis/EmbeddingModels/TweetSentimentBinary1.h5', compile = True,
                   custom_objects = {'scorer_nn1':scorer_nn1})
tweet_vectors = pickle.load(open('../../Vectorize Tweets/genai_tweet_embeddings.pkl','rb'))
probs = model.predict(tweet_vectors)[:,1].flatten()



In [155]:
df['Sentiment Score'] = 2*probs - 1
df

Unnamed: 0,Datetime,Tweet Id,Text,Username,Date,Sentiment Score
0,2023-04-19 21:27:19+00:00,1648800467206672384,From Studio Gangster to Synthetic Gangster 🎤.....,resembleai,2023-04-19,0.251432
1,2023-04-19 21:27:09+00:00,1648800425540476929,Took me some time to find this. I build this #...,devaanparbhoo,2023-04-19,-0.358057
2,2023-04-19 21:26:57+00:00,1648800376479715328,Mind blowing next wave #generativeai platform...,timreha,2023-04-19,0.820966
3,2023-04-19 21:26:49+00:00,1648800341193027584,Open Source Generative AI Image Specialist Sta...,VirtReview,2023-04-19,0.907774
4,2023-04-19 21:25:00+00:00,1648799883934203905,Are you an #HR leader considering which future...,FrozeElle,2023-04-19,0.879874
...,...,...,...,...,...,...
56216,2022-04-24 16:40:01+00:00,1518268535276904448,"Understanding Generative AI, Its Impacts and L...",analyticsinme,2022-04-24,0.074315
56217,2022-04-23 07:23:24+00:00,1517766068592381952,Y ya puedes empezar a crear #arte con @thegeni...,iia_es,2022-04-23,0.437956
56218,2022-04-22 08:20:21+00:00,1517418013812830208,"NVIDIA researchers have developed GANverse3D, ...",VideoGenAI,2022-04-22,0.332284
56219,2022-04-21 13:15:21+00:00,1517129866403008512,Tech Trend 2022: เทรนด์เทคโนโลยีสำหรับปี 2022 ...,sitthinuntp,2022-04-21,0.743980


In [183]:
tweet_quantity = df.groupby('Date').count()['Datetime']
tweet_quantity.name = 'Tweet Quantity'

daily_sentiment = df.groupby('Date').sum()['Sentiment Score']
daily_sentiment.name = 'Raw Daily Sentiment'

daily_sentiment_scaled = daily_sentiment/tweet_quantity
daily_sentiment_scaled.name = 'Daily Sentiment Score'

avg_sentiment = np.cumsum(daily_sentiment)/np.cumsum(tweet_quantity)
avg_sentiment.name = 'Average Sentiment Over Time'

sentiment_df = round(pd.concat([daily_sentiment_scaled, avg_sentiment], axis = 1),2)
sentiment_df

Unnamed: 0_level_0,Daily Sentiment Score,Average Sentiment Over Time
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2022-04-21,0.79,0.79
2022-04-22,0.33,0.64
2022-04-23,0.44,0.59
2022-04-24,0.07,0.49
2022-04-25,0.72,0.62
...,...,...
2023-04-15,0.57,0.58
2023-04-16,0.58,0.58
2023-04-17,0.53,0.58
2023-04-18,0.57,0.58


In [208]:
final_sentiment_df = pd.DataFrame()

for col in ['Daily Sentiment Score','Average Sentiment Over Time']:
    
    subset = sentiment_df[[col]].copy()
    subset.columns = ['Sentiment Score']
    subset['Score Type'] = col
    final_sentiment_df = pd.concat([final_sentiment_df,subset])

final_sentiment_df.reset_index(inplace = True)
final_sentiment_df['Color'] = final_sentiment_df['Sentiment Score'].apply(lambda x:'lightblue' if x>= 0 else 'lightred')

final_sentiment_df

Unnamed: 0,Date,Sentiment Score,Score Type,Color
0,2022-04-21,0.79,Daily Sentiment Score,lightblue
1,2022-04-22,0.33,Daily Sentiment Score,lightblue
2,2022-04-23,0.44,Daily Sentiment Score,lightblue
3,2022-04-24,0.07,Daily Sentiment Score,lightblue
4,2022-04-25,0.72,Daily Sentiment Score,lightblue
...,...,...,...,...
677,2023-04-15,0.58,Average Sentiment Over Time,lightblue
678,2023-04-16,0.58,Average Sentiment Over Time,lightblue
679,2023-04-17,0.58,Average Sentiment Over Time,lightblue
680,2023-04-18,0.58,Average Sentiment Over Time,lightblue


### Chart Config

In [262]:
chart1 = alt.Chart(final_sentiment_df[final_sentiment_df['Score Type'] == 'Daily Sentiment Score']
                  ,title = '#GenerativeAI Tweet Sentiment Over Time').mark_bar().encode(
    x = alt.X('Date'),
    y = alt.Y('Sentiment Score',title = 'Sentiment Score'),
    color = 'Color:N',
    tooltip = ['Date',alt.Tooltip('Sentiment Score',title = 'Daily Sentiment Score')],
    opacity = alt.value(0.15)
).interactive()

chart2 = alt.Chart(final_sentiment_df[final_sentiment_df['Score Type'] == 'Average Sentiment Over Time'],
                   title = '#GenerativeAI Tweet Sentiment Over Time').mark_line().encode(
    x = alt.X('Date'),
    y = alt.Y('Sentiment Score',title = 'Sentiment Score'),
    tooltip = ['Date',alt.Tooltip('Sentiment Score',title = 'Average Sentiment Over Time')],
).interactive()

chart = chart1 + chart2
chart = chart.configure_legend(disable = True)

### Chart

In [263]:
chart