In [1]:
import numpy as np
import matplotlib.pyplot as plt
import plotly
import plotly.graph_objects as go
import pandas as pd
import pickle
from dict_countries import country_info 
import copy
import math

In [2]:
file_path = './FULL-Table.csv'
df = pd.read_csv(file_path, encoding='utf-8')

columns_to_drop = ['Unnamed: 23', 'Unnamed: 24', 'Unnamed: 25']
df = df.drop(columns=columns_to_drop, axis=1)

columns_to_fill_nan_zero = ['X (Twitter) Follower #', 'Facebook Follower #', 'Threads Follower #', 'YouTube Subscriber #', 'TikTok Subscriber #']
df[columns_to_fill_nan_zero] = df[columns_to_fill_nan_zero].fillna(0).astype('str')
for c in columns_to_fill_nan_zero:
    df[c] = df[c].str.replace(',', '').astype(float).astype(int)

## Extract Data

In [3]:
columns_of_followers = columns_to_fill_nan_zero
parents = df['Parent entity (English)'].unique()
parents_media = {}
for p in parents:
    temp = df[df['Parent entity (English)'] == p][columns_of_followers].sum(axis=0)
    parents_media[p] = temp.to_dict()
    parents_media[p]['sum'] = temp.sum()
    
with open('parents_stats.pkl', 'wb') as f:
    pickle.dump(parents_media, f)

In [4]:
columns_of_followers = columns_to_fill_nan_zero
parents = df['Language'].unique()
parents_media = {}
for p in parents:
    temp = df[df['Language'] == p][columns_of_followers].sum(axis=0)
    parents_media[p] = temp.to_dict()
    parents_media[p]['sum'] = temp.sum()
    
with open('lang_stats.pkl', 'wb') as f:
    pickle.dump(parents_media, f)

In [5]:
columns_of_followers = columns_to_fill_nan_zero
parents = df['Region of Focus'].unique()
parents_media = {}
for p in parents:
    temp = df[df['Region of Focus'] == p][columns_of_followers].sum(axis=0)
    parents_media[p] = temp.to_dict()
    parents_media[p]['sum'] = temp.sum()

final ={} 
for media in parents_media:
    try:
        new_keys = country_info[media]
    except:
        continue
    if not isinstance(new_keys, list):
        new_keys = [new_keys]
    parents_counts = parents_media[media]
    for a in parents_counts:
        parents_counts[a] = parents_counts[a] // len(new_keys)
    
    for new_key in new_keys:
        if new_key in final:
            new = copy.copy(final[new_key])
            for key in final[new_key]:
                new[key] += parents_counts[key]
            final[new_key] = new
        else:
            final[new_key] = parents_counts

with open('contries_stats.pkl', 'wb') as f:
    pickle.dump(final, f)

## Visualize Growth Rate in TT

In [6]:
with open('./data/updated_tt_followers.pkl', 'rb') as f:
    updated_tt = pickle.load(f)

In [13]:
old_tt = {x:y for x,y in zip(df['X (Twitter) handle'].tolist(), df['X (Twitter) Follower #'].tolist()) if isinstance(x, str)}

parents = df['Parent entity (English)'].unique()
parent_to_tt = {}
for p in parents:
    parent_to_tt[p] = [i for i in df[df['Parent entity (English)'] == p]['X (Twitter) handle'].tolist() if isinstance(i, str)]
    
parent_to_tt
user_diff ={}
for user, value in old_tt.items():
    try:
        diff = updated_tt[user] - value
        user_diff[user] = int(diff)
    except KeyError:
        pass

parent_diff = {}
for p, childs in parent_to_tt.items():
        parent_diff[p] = sum([user_diff[i] if i in user_diff else 0 for i in childs])

parent_diff_sorted = sorted(parent_diff.items(), key=lambda x: x[1], reverse=True)
parent_name, parent_growth = zip(*parent_diff_sorted)


# Creating the bar chartt
fig = go.Figure([go.Bar(x=parent_name, y=parent_growth)])

# Adding titles and labels (optional, but recommended for clarity)
fig.update_layout(title='Twitter Growth Count Based on Parent', xaxis_title='Parent', yaxis_title='Count', width=800, height=1100)

# Show the figure
fig.show()

In [8]:
old_tt = {x:y for x,y in zip(df['X (Twitter) handle'].tolist(), df['X (Twitter) Follower #'].tolist()) if isinstance(x, str)}

parents = df['Parent entity (English)'].unique()
parent_to_tt = {}
for p in parents:
    parent_to_tt[p] = [i for i in df[df['Parent entity (English)'] == p]['X (Twitter) handle'].tolist() if isinstance(i, str)]
    
parent_to_tt
user_diff ={}
for user, value in old_tt.items():
    try:
        diff = updated_tt[user] - value
        user_diff[user] = int(diff)
    except KeyError:
        pass

parent_diff = {}
parent_pos = {}
parent_neg = {}
for p, childs in parent_to_tt.items():
        parent_diff[p] = sum([user_diff[i] if i in user_diff else 0 for i in childs])
        parent_pos[p] = sum([user_diff[i] if i in user_diff and user_diff[i] > 0 else 0 for i in childs])
        parent_neg[p] = sum([user_diff[i] if i in user_diff and user_diff[i] < 0 else 0 for i in childs])

parent_diff_sorted = sorted(parent_diff.items(), key=lambda x: x[1], reverse=True)
parent_name, parent_growth = zip(*parent_diff_sorted)

lparent_pos = []
lparent_neg = []
for i, j in parent_diff_sorted:
    lparent_pos.append(parent_pos[i])
    lparent_neg.append(parent_neg[i])

fig = go.Figure()

# Adding bar for good items
fig.add_trace(go.Bar(
    x=parent_name,
    y=lparent_pos
))

# Adding bar for bad items
fig.add_trace(go.Bar(
    x=parent_name,
    y=lparent_neg
))

# Update layout
fig.update_layout(
    title='Twitter Growth Count Based on Parent (sorted)',
    xaxis_title='Parent',
    yaxis_title='Count',
    barmode='relative',  # This makes the negative bars go downwards
    width=1000,
    height=1100,
    showlegend=False
)

# Show the figure
fig.show()

In [9]:
old_tt = {x:y for x,y in zip(df['X (Twitter) handle'].tolist(), df['X (Twitter) Follower #'].tolist()) if isinstance(x, str)}

parents = df['Language'].unique()
parent_to_tt = {}
for p in parents:
    parent_to_tt[p] = [i for i in df[df['Language'] == p]['X (Twitter) handle'].tolist() if isinstance(i, str)]
    if not parent_to_tt[p]:
        del parent_to_tt[p]
    
parent_to_tt
user_diff ={}
for user, value in old_tt.items():
    try:
        diff = updated_tt[user] - value
        user_diff[user] = int(diff)
    except KeyError:
        pass

parent_diff = {}
parent_pos = {}
parent_neg = {}
for p, childs in parent_to_tt.items():
        parent_diff[p] = sum([user_diff[i] if i in user_diff else 0 for i in childs])
        parent_pos[p] = sum([user_diff[i] if i in user_diff and user_diff[i] > 0 else 0 for i in childs])
        parent_neg[p] = sum([user_diff[i] if i in user_diff and user_diff[i] < 0 else 0 for i in childs])

parent_diff_sorted = sorted(parent_diff.items(), key=lambda x: x[1], reverse=True)
parent_name, parent_growth = zip(*parent_diff_sorted)

lparent_pos = []
lparent_neg = []
for i, j in parent_diff_sorted:
    lparent_pos.append(parent_pos[i])
    lparent_neg.append(parent_neg[i])

fig = go.Figure()

# Adding bar for good items
fig.add_trace(go.Bar(
    x=parent_name,
    y=lparent_pos,
))

# Adding bar for bad items
fig.add_trace(go.Bar(
    x=parent_name,
    y=lparent_neg,
))

# Update layout
fig.update_layout(
    title='Twitter Growth Count Based on Language (sorted)',
    xaxis_title='Language',
    yaxis_title='Count',
    barmode='relative',  # This makes the negative bars go downwards
    width=1000,
    height=600,
    showlegend=False
)

# Show the figure
fig.show()

In [10]:
old_tt = {x:y for x,y in zip(df['X (Twitter) handle'].tolist(), df['X (Twitter) Follower #'].tolist()) if isinstance(x, str)}

parents = df['Language'].unique()
parent_to_tt = {}
for p in parents:
    parent_to_tt[p] = [i for i in df[df['Language'] == p]['X (Twitter) handle'].tolist() if isinstance(i, str)]
    if not parent_to_tt[p]:
        del parent_to_tt[p]
    
parent_to_tt
user_diff ={}
for user, value in old_tt.items():
    try:
        diff = updated_tt[user] - value
        user_diff[user] = int(diff)
    except KeyError:
        pass

parent_diff = {}
for p, childs in parent_to_tt.items():
        parent_diff[p] = sum([user_diff[i] if i in user_diff else 0 for i in childs])

parent_diff_sorted = sorted(parent_diff.items(), key=lambda x: x[1], reverse=True)
parent_name, parent_growth = zip(*parent_diff_sorted)


# Creating the bar chart
fig = go.Figure([go.Bar(x=parent_name, y=parent_growth)])

# Adding titles and labels (optional, but recommended for clarity)
fig.update_layout(title='Twitter Growth Count Based on Language', xaxis_title='Parent', yaxis_title='Count', width=800, height=600)

# Show the figure
fig.show()

In [11]:
names = ['Twitter', 'Facebook', 'Threads', 'YouTube', 'TikTok']
nums = (df[columns_of_followers] !=0).sum().tolist()
fig = go.Figure(data=[go.Pie(labels=names, values=nums)])
# Customize aspects of the layout
fig.update_layout(
    title='Accounts in Social Media Platforms',
    width=600,
    height=600
)

# Show the plot
fig.show()


In [12]:
import plotly.graph_objects as go

# Sample data: follower counts for 20 people on 5 social media platforms
twitter_followers =  df[df[columns_of_followers[0]]!= 0][columns_of_followercountries_statsts[0]].tolist()
facebook_followers = df[df[columns_of_followers[1]]!= 0][columns_of_followers[1]].tolist()
threads_followers =  df[df[columns_of_followers[2]]!= 0][columns_of_followers[2]].tolist()
youtube_followers =  df[df[columns_of_followers[3]]!= 0][columns_of_followers[3]].tolist()
tiktok_followers =   df[df[columns_of_followers[4]]!= 0][columns_of_followers[4]].tolist()

# Create box plots for each platform
my_bool = False
fig = go.Figure()
fig.add_trace(go.Box(y=facebook_followers, name='Facebook', boxpoints = my_bool))
fig.add_trace(go.Box(y=twitter_followers, name='Twitter', boxpoints = my_bool))
fig.add_trace(go.Box(y=threads_followers, name='Threads', boxpoints = my_bool))
fig.add_trace(go.Box(y=youtube_followers, name='YouTube', boxpoints = my_bool))
fig.add_trace(go.Box(y=tiktok_followers, name='TikTok', boxpoints = my_bool))

# Customize layout
fig.update_layout(
    title='Social Media Followers Distribution',
    yaxis_title='Number of Followers',
    yaxis_type='log',
    width=800,
    height=600
)

# Show the plot
fig.show()


NameError: name 'columns_of_followercountries_statsts' is not defined

In [None]:
import pycountry
import country_converter as coco

import plotly.graph_objects as go
with open('./data/contries_stats.pkl', 'rb') as f:
    countries_stats = pickle.load(f)
    

c3 = []
v3 = []
tt3 = []
fb3 = []
tik3 = []
thr3 = []
yt3 = []

error = 0
for c in countries_stats:
    try:
        c3.append(coco.convert(names=c, to='ISO3'))
        v3.append(countries_stats[c]['sum'])
        tt3.append(countries_stats[c]['X (Twitter) Follower #'])
        fb3.append(countries_stats[c]['Facebook Follower #'])
        tik3.append(countries_stats[c]['TikTok Subscriber #'])
        thr3.append(countries_stats[c]['Threads Follower #'])
        yt3.append(countries_stats[c]['YouTube Subscriber #'])
        
    except AttributeError:
        error += 1


In [None]:
import plotly.graph_objects as go
import numpy as np

# Example country ISO codes, population data, and additional values
countries = c3
populations = list(v3)
log_populations = list(map(math.log10, v3))

colors = {
    'Country': 'bla1ck',
    'Twitter': 'blu1e',
    'Facebook': 'bl1ue',
    'Threads': 'bri1ght_green',
    'YouTube': 're1d',
    'TikTok': 'pi1nk'
}

hover_text = [
    f'<span style="color:{colors["Country"]}">{country}</span><br>'
    f'<span style="color:{colors["Twitter"]}">Twitter: {tt1}</span><br>'
    f'<span style="color:{colors["Facebook"]}">Facebook: {fb1}</span><br>'
    f'<span style="color:{colors["Threads"]}">Threads: {thr1}</span><br>'
    f'<span style="color:{colors["YouTube"]}">YouTube: {yt1}</span><br>'
    f'<span style="color:{colors["TikTok"]}">TikTok: {tik1}</span><br>'
    f'<br>'
    f'<span style="color:{colors["TikTok"]}">Total: {summ}</span>'
    
    for country, summ, tt1, fb1, thr1, yt1, tik1 in zip(c3, v3, tt3, fb3, thr3, yt3, tik3)]
# Create custom hover text
# hover_text = [f'{country}<br>#Total Followers: {summ}<br>#Twitter: {tt1}<br>#Facebook: {fb1}<br>#Threads: {thr1}<br>#YouTube: {yt1}<br>#Tiktok: {tik1}' 
              

# Create the choropleth map
fig = go.Figure(data=go.Choropleth(
    locations=countries,
    z=log_populations,
    text=hover_text,       # Custom hover text
    hoverinfo='text',      # Use custom text for hover info
    colorbar_title='Number of Followers (Log)',
    
    colorscale='Blues',  # Color scale for the map
    autocolorscale=False,
    reversescale=False,
    marker_line_color='darkgray',
    marker_line_width=0.5,
))


# Update the layout
fig.update_layout(
    title_text='Affect of Chinese Social Media',
    geo=dict(
        showframe=True,
        showcoastlines=False,
        projection_type='equirectangular'
    ),
    width=1100,
    height=700
)

# Show the figure
fig.show()

In [None]:
c3 = []
v3 = []
tt3 = []
fb3 = []
tik3 = []
thr3 = []
yt3 = []

error = 0
for c in countries_stats:
    try:
        c3.append(coco.convert(names=c, to='ISO3'))
        v3.append(countries_stats[c]['sum'])
        tt3.append(countries_stats[c]['X (Twitter) Follower #'])
        fb3.append(countries_stats[c]['Facebook Follower #'])
        tik3.append(countries_stats[c]['TikTok Subscriber #'])
        thr3.append(countries_stats[c]['Threads Follower #'])
        yt3.append(countries_stats[c]['YouTube Subscriber #'])
        
    except AttributeError:
        error += 1


In [None]:
import plotly.graph_objects as go


# Sample data
np_all = np.array([v3, tt3, fb3, thr3, yt3, tik3]).T
sort_idx = np_all[:, 0].argsort()[::-1][:10]
c3 = np.array(c3)[sort_idx]

# Create traces (bars) for each category
trace1 = go.Bar(
    x=c3,
    y=np_all[sort_idx][:,1],
    name='Twitter',
#     orientation='h'
#     marker_color='blue'
)
trace2 = go.Bar(
    x=c3,
    y=np_all[sort_idx][:,2],
    name='Facebook',
#     orientation='h'
#     marker_color='blue'
)
trace3 = go.Bar(
    x=c3,
    y=np_all[sort_idx][:,-1],
    name='TikTok',
#     orientation='h'
#     marker_color='blue'
)
trace4 = go.Bar(
    x=c3,
    y=np_all[sort_idx][:,3],
    name='Threads',
#     orientation='h'
#     marker_color='blue'
)
trace5 = go.Bar(
    x=c3,
    y=np_all[sort_idx][:,-2],
    name='YouTube',
#     orientation='h'
#     marker_color='blue'
)

# Create the figure and add traces
fig = go.Figure(data=[trace1, trace2,trace3, trace4,trace5])

# Update layout for stacked bar plot
fig.update_layout(
    barmode='stack',  # This makes the bars stack on top of each other
    title='Top 10 Countries Based on Number of Follewrs ',
    xaxis_title='Countries',
    yaxis_title='# Followers',
    width=800,
    height=660
)

# Show the figure
fig.show()
