In [1]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import altair as alt

In [2]:
df = pd.read_csv("F:/WB/Issue Monitoring/data/TOP_10_Evolution_Weekly_Issue_Keyword_user_level_new_normal.csv")

df['tweets_count'] = df['tweets_count'].astype(int)
df['week_of_the_year'] = df['week_of_the_year'].astype(int)

df = df[df['week_of_the_year']>=19].reset_index(drop=True)

In [3]:
df.head()

Unnamed: 0,user_id,tweets_count,week_of_the_year,favorite_count,retweet_count,reply_count,verified,sentiment_score,confidence in government new normal,economic policy new normal,employment new normal,health care new normal,health protocol new normal,mobility new normal
0,1226050062762954757,77,19,0,0,0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,380183734,21,20,0,0,0,False,0.5,0.0,0.0,0.0,0.0,0.0,0.0
2,1218348641498226688,136,22,0,0,0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,853230786601930752,17,22,0,0,0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,901381473915109377,60,22,0,0,0,False,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [4]:
df['week_of_the_year'].unique()

array([19, 20, 22, 21, 23, 24])

In [5]:
week_dict = {}
for week in [19,20,21,22,23,24]:
    week_dict[str(week)] = {}
    news_count_dict = {}
    for topic in ['confidence in government new normal','economic policy new normal','employment new normal','health care new normal','health protocol new normal','mobility new normal']:
        print('Generating Stats for Week {} and Topic {}'.format(week,topic))
        filter_df = df[(df[topic]==1) & (df.week_of_the_year==week)].reset_index(drop=True)
        news_count_dict[topic] = filter_df['tweets_count'].sum()

    week_dict[str(week)]['tweets_count'] = news_count_dict

Generating Stats for Week 19 and Topic confidence in government new normal
Generating Stats for Week 19 and Topic economic policy new normal
Generating Stats for Week 19 and Topic employment new normal
Generating Stats for Week 19 and Topic health care new normal
Generating Stats for Week 19 and Topic health protocol new normal
Generating Stats for Week 19 and Topic mobility new normal
Generating Stats for Week 20 and Topic confidence in government new normal
Generating Stats for Week 20 and Topic economic policy new normal
Generating Stats for Week 20 and Topic employment new normal
Generating Stats for Week 20 and Topic health care new normal
Generating Stats for Week 20 and Topic health protocol new normal
Generating Stats for Week 20 and Topic mobility new normal
Generating Stats for Week 21 and Topic confidence in government new normal
Generating Stats for Week 21 and Topic economic policy new normal
Generating Stats for Week 21 and Topic employment new normal
Generating Stats for

In [7]:
tweets_count_list = []
for week in ['19','20','21','22','23','24']:
    for topic in ['confidence in government new normal','economic policy new normal','employment new normal','health care new normal','health protocol new normal','mobility new normal']:
        tweets_count_list.append(week_dict[week]['tweets_count'][topic])


In [8]:
source = pd.DataFrame({
    'week' : ['May 4th - May 10th']*6 + ['May 11th - May 17th']*6 + ['May 18th - May 24th']*6 + ['May 25th - May 31th']*6 + ['June 1st - June 7th']*6 + ['June 8th - June 13th']*6,
    'topic' : ['confidence in government new normal','economic policy new normal','employment new normal','health care new normal','health protocol new normal','mobility new normal']*6,
    'tweets count': tweets_count_list
})

source = source[source['tweets count']!=0].reset_index(drop=True)

In [9]:
alt.Chart(source).mark_bar().encode(
    x = alt.X('week',sort=['May 4th - May 10th','May 11th - May 17th']),
    y = alt.Y('tweets count',stack="normalize", axis=alt.Axis(format='%'), title = '% Posts'),
    color = 'topic',
    order = alt.Order('topic',sort='ascending')
).properties(
    width=1100,
    height=700,
    title='Distribution of New Normal Topic Weekly'
).configure_axis(
    labelFontSize=12,
    titleFontSize=20
).configure_title(
    fontSize=30
).configure_legend(
    labelFontSize=15
).configure_axisX(
    labelAngle = 0
)