# Analysis
This Jupyter Notebook contains code to create all tables and figures used in the research. Run the first cell at the beginning because it contains code necessary 
to run all the bottom cells. Most cells can then be autonomously run to create the tables or figures indicated on the first line. If a cell is dependent on another, 
it says so in the title.

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib as mpl
import plotly.express as px

pd.set_option('display.min_rows', 400)
pd.set_option('display.max_rows', 400)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', 100)
pd.set_option('display.width', 1000)
pd.set_option('display.colheader_justify', 'center')
pd.set_option('display.precision', 3)

mpl.rcParams['figure.dpi'] = 75

# Convert the 'created_at' strings to datetime and then ISO calendar values. The ISO calendar is based on years, weeks and days.
# Then update the 'created_at' column with simple strings representing the year and week, e.g. '2018-05', and exclude potential tweets from outside our period.
tweets = pd.read_feather('data/final_tweets.feather')
year_week = pd.to_datetime(tweets.created_at.values, dayfirst = True).isocalendar()
tweets['created_at'] = [f'{year_week.iat[idx, 0]}-{str(year_week.iat[idx, 1]).zfill(2)}' for idx in range(len(year_week))]
tweets = tweets[tweets.created_at.isin(['2019-48', '2019-49', '2019-50', '2019-51', '2019-52']) == False]

# Create a column in the tweets dataframe that indicates whether a given tweet was made before or after the beginning of FFF protests.
pre = sorted(tweets.created_at.unique())[:38]
tweets['period'] = ['Pre FFF' if row.created_at in pre else 'Post FFF' for row in tweets.itertuples()]

# Import and adapt the companies dataframe, define variables used in multiple subsequent cells.
companies = pd.read_csv('data/Forbes_global_2000_2019.csv')
companies = companies[companies['Continent'] == 'Europe']
unique_countries = np.append(companies.Country.unique(), 'Total')
for idx, ele in enumerate(unique_countries):
    if unique_countries[idx] == 'United Kingdom':
        unique_countries[idx] = 'UK'
    elif unique_countries[idx] == 'Czech Republic':
        unique_countries[idx] = 'Czechia'

# Define rows in tables to be shaded grey or colored red.
idx_grey = pd.IndexSlice
slice_grey = idx_grey[::2, :]
idx_no_twitter = pd.IndexSlice
slice_no_twitter = idx_no_twitter[companies['Twitter ID'] == 'No Twitter', :]

topic_names = ['Non-category', 'Health', 'Cars', 'Sports', 'Diversity', 'Financial results', 'Economics', 'Financial services', 'Customer service', \
                'Food & Holidays', 'Luxury', 'Travel', 'IT infrastructure', 'Future mobility', 'Sustainability', 'Artificial Intelligence', 'Innovation']

## Visualised Approach

In [91]:
from diagrams import Cluster, Diagram
from diagrams.saas.social import Twitter
from diagrams.custom import Custom

graph_attr = {
    "bgcolor": "transparent"
}

with Diagram('', filename='visualised_approach', show=False, graph_attr=graph_attr, direction='LR'):
    tw = Twitter('Input Tweet')
    topics = Custom('Topics', './icons/boxes.png')
    
    with Cluster('Preprocess', graph_attr=graph_attr):
        cleaning = Custom('Cleaning', './icons/clean.png')
        translating = Custom('Translating', './icons/helsinki_nlp.png')
        cleaning >> translating
    
    with Cluster('Embedding & Clustering', graph_attr=graph_attr):
        embedding = Custom('Embedding', './icons/hugging_face.png')
        reducing = Custom('Dimensionality Reduction', './icons/umap.png')
        clustering = Custom('Clustering', './icons/hdbscan.png')
        embedding >> reducing >> clustering

    tw >> cleaning >> embedding >> topics 

## Forbes Global 2000 European companies complete and small table

In [124]:
# 2019 Forbes Global 2000 European companies complete table
companies.style.hide(['Forbes Webpage', 'Profits as % of Assets', 'Profits as % of Revenue', 'Continent', 'Headquarters'], axis = 'columns')\
                .format(precision = 2)\
                .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
                .set_properties(**{'background-color': '#ff7f7f'}, subset = slice_no_twitter)\
                .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-bottom: 2px solid black; font-size: 8pt; padding-bottom: 5px;'},
    {'selector': 'th.col_heading', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-top: 2px solid black; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px;'},
    {'selector': 'th.row_heading', 'props': 'font-family: Helvetica Neue;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left;'}])\
                .to_html(buf = f'{os.getcwd()}/forbes_complete.html')

In [125]:
# 2019 Forbes Global 2000 European companies small table
companies_sample = companies.sample(10, random_state = 123)
companies_sample.style.hide()\
                .hide(['Forbes Webpage', 'Profits as % of Assets', 'Profits as % of Revenue', 'Continent', 'Headquarters'], axis = 'columns')\
                .format(precision = 2)\
                .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
                .set_properties(**{'background-color': '#ff7f7f'}, subset = slice_no_twitter)\
                .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-bottom: 2px solid black; font-size: 8pt; padding-bottom: 5px;'},
    {'selector': 'th', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-top: 2px solid black; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left;'}])\
                .to_html(buf = f'{os.getcwd()}/forbes_small.html')

## Fridays for Future table

In [6]:
fff_18 = pd.read_csv('data/FFF_global_2018.csv')
fff_18 = fff_18.set_index('Unnamed: 0')
fff_18.index.name = None
fff_18.columns = pd.to_datetime(fff_18.columns.values, dayfirst = True).isocalendar()
fff_18 = fff_18.filter(items = unique_countries, axis = 0)
fff_18 = fff_18.groupby(fff_18.columns, axis = 1).agg(lambda x: x.apply(lambda y: sum([l for l in y if str(l) != "nan"]), axis = 1))

fff_19 = pd.read_csv('data/FFF_global_2019.csv')
fff_19 = fff_19.set_index('Unnamed: 0')
fff_19.index.name = None
fff_19 = fff_19.filter(items = unique_countries, axis = 0).drop(['01.12.19', '08.12.19', '15.12.19', '22.12.19', '29.12.19', '05.01.20'], axis = 1)
fff_19.columns = pd.to_datetime(fff_19.columns.values, dayfirst = True).isocalendar()
fff_19 = fff_19.groupby(fff_19.columns, axis = 1).agg(lambda x: x.apply(lambda y: sum([l for l in y if str(l) != "nan"]), axis = 1))

fff = pd.concat([fff_18, fff_19], axis = 1)
fff = fff.groupby(fff.columns, axis = 1).agg(lambda x: x.apply(lambda y: sum([l for l in y if str(l) != "nan"]), axis = 1))
fff.columns = ['-'.join(tuple(str(num).zfill(2) for num in tup[:2])) for tup in fff.columns.values]

In [10]:
fff.style.format(precision = 0)\
            .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
            .applymap(lambda v, props='color: rgba(0,0,0,0);': props if v == 0 else None)\
            .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-top: 2px solid black; border-bottom: 2px solid black; font-size: 6pt; padding-bottom: 5px;'},
    {'selector': 'th.col_heading', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px; min-width: 35px;'},
    {'selector': 'th.row_heading', 'props': 'font-family: Helvetica Neue;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left;'}])\
            .to_html(buf = f'{os.getcwd()}/fff.html')

## UMAP fire graph

In [None]:
import umap.plot
import pickle
from umap import UMAP

if os.path.isfile(f'{(cwd := os.getcwd())}/data/embeddings.pkl'):
    with open('data/embeddings.pkl', "rb") as fIn:
        stored_data = pickle.load(fIn)
        embeddings = stored_data['embeddings']

umap_embeddings_plot = UMAP(n_components = 2, n_neighbors = 100, min_dist = 0.01, verbose = True, low_memory = True).fit(embeddings)
umap.plot.points(umap_embeddings_plot, labels=tweets.topic, theme='fire')

## Topics table

In [109]:
topics = pd.read_csv('data/top_n_words_per_topic.csv', header=None)
del topics[0]
topics.reindex(columns=range(1, 11))
for row in topics.itertuples():
    adapted_row = topics.at[row.Index, 1].split('(')
    adapted_row.remove('[')
    topics.at[row.Index, 1] = [ele.split(',')[0][1:-1] for ele in adapted_row]
    for idx in reversed(range(1, 11)):
        topics.at[row.Index, idx] = topics.at[row.Index, 1].pop()

topics = topics.set_index(pd.Index(range(-1, 16)))
topics = topics[sorted(topics.columns)]
topics['Topic name'] = topic_names

topics.style.format(precision = 0)\
            .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
            .applymap(lambda v, props='color: rgba(0,0,0,0);': props if v == 0 else None)\
            .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-top: 2px solid black; border-bottom: 2px solid black; font-size: 8pt; padding-bottom: 5px;'},
    {'selector': 'th.col_heading', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px;'},
    {'selector': 'th.row_heading', 'props': 'font-family: Helvetica Neue;'},
    {'selector': 'td.data.col10', 'props': 'font-weight: bold;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left;'}])\
            .to_html(buf = f'{os.getcwd()}/topics.html')

## Overall protests and tweeting
This part depends on the 'Fridays for Future table' cell.

In [7]:
# Here we create a df that is the basis of our graphs. Its index are all weeks considered and its columns are defined one after the other.
general_date = pd.DataFrame(tweets.groupby('created_at').size(), columns=['total_tweets'])
general_date.index.rename('week', inplace=True)
general_date['sustainability_tweets'] = tweets.loc[tweets['topic'] == 13].groupby('created_at').size()
general_date['sustainability_share'] = general_date['sustainability_tweets'] / general_date['total_tweets']
general_date['total_protesters'] = fff.iloc[-1]
general_date['total_protesters'] = general_date['total_protesters'].fillna(0).astype('Int64')
general_date = general_date.reset_index()
general_date['week'] = general_date['week'].astype(str)

general_pre_fff_median = general_date.iloc[:38]['sustainability_share'].median().round(3)
general_post_fff_median = general_date.iloc[38:]['sustainability_share'].median().round(3)

In [None]:
px.line(general_date, x="week", y="sustainability_share", height=500, width=1000, color_discrete_sequence=['black'])\
    .update_layout(title=dict(text='Share of corporate tweets related to sustainability', font=dict(family='Helvetica Neue', color='black')),
                    xaxis_title=dict(text='Week', font=dict(family='Helvetica Neue', color='black')),
                    xaxis=dict(tickfont=dict(family='Helvetica Neue', color='black'), type='category'),
                    yaxis_title=dict(text='Percentage of tweets', font=dict(family='Helvetica Neue', color='black')),
                    yaxis=dict(tickfont=dict(family='Helvetica Neue', color='black'), tickformat='.0%'),
                    autosize=False,
                    plot_bgcolor='#e6e6e3')\
    .add_shape(type="line", x0=0, y0=general_pre_fff_median, x1='2018-33', y1=general_pre_fff_median, line=dict(color='#C84B31', dash="dashdot"))\
    .add_shape(type="line", x0='2018-35', y0=general_post_fff_median, x1='2019-47', y1=general_post_fff_median, line=dict(color='#C84B31', dash="dashdot"))\
    .add_annotation(x='2019-37', y=0.065, text="Post FFF median", font=dict(family='Helvetica Neue', color='#C84B31'), showarrow=False)\
    .add_annotation(x='2018-04', y=0.057, text="Pre FFF median", font=dict(family='Helvetica Neue', color='#C84B31'), showarrow=False)\
    .add_vrect(x0="2018-34", x1="2018-35", annotation_text="FFF protests start", annotation_font_family='Helvetica Neue', annotation_position="top left",
                fillcolor="black", opacity=0.25, line_width=0, annotation=dict(font_color='black'))\
    .show()

In [None]:
px.line(general_date, x="week", y="total_protesters", height=500, width=1000)\
    .update_layout(title=dict(text='FFF protesters in selected European countries per week', font=dict(family='Helvetica Neue', color='black')),
                    xaxis_title=dict(text='Week', font=dict(family='Helvetica Neue', color='black')),
                    xaxis=dict(tickfont=dict(family='Helvetica Neue', color='black'), type='category'),
                    yaxis_title=dict(text='Number of protesters', font=dict(family='Helvetica Neue', color='black')),
                    yaxis=dict(tickfont=dict(family='Helvetica Neue', color='black')),
                    autosize=False,
                    plot_bgcolor='#e6e6e3')\
    .add_vrect(x0="2018-34", x1="2018-35", annotation_text="FFF protests start", annotation_font_family='Helvetica Neue', annotation_position="top left",
                fillcolor="black", opacity=0.25, line_width=0, annotation=dict(font_color='black'))\
    .update_traces(line_color='black')\
    .show()

In [None]:
from scipy.stats import pearsonr

r, p = pearsonr(general_date['sustainability_share'], general_date['total_protesters'])
print('Pearsons correlation: %.3f' % r)
print('The p-value is: %.20f' % p)

## B2B and B2C differences

The following cell is necessary for all other cells in this section.

In [3]:
# Create a column in the tweets dataframe with the respective company name.
ids = [id[1:] for id in companies['Twitter ID'].values]
handle_to_name = pd.Series(companies.Company.values, index=ids).to_dict()
tweets['company'] = [handle_to_name[handle] for handle in tweets.screen_handle.values]

# All industries in the Materials and Industrials sectors minus 'Airline', 'Conglomerate' and 'Air Courier'
b2b_industries = ['Iron & Steel', 'Construction Services', 'Diversified Chemicals', 'Diversified Metals & Mining', 
                    'Other Transportation', 'Electrical Equipment', 'Trucking', 'Paper & Paper Products',
                    'Heavy Equipment', 'Specialized Chemicals', 'Aerospace & Defense', 'Construction Materials', 
                    'Other Industrial Equipment', 'Aluminum']

# All industries in the Consumer Discretionary and Consumer Staples sectors minus 'Business and Personal Services', 'Advertising'
b2c_industries = ['Beverages', 'Food Retail', 'Apparel/Accessories', 'Household/Personal Care',
                    'Auto & Truck Manufacturers', 'Food Processing', 'Auto & Truck Parts', 'Hotels & Motels',
                    'Household Appliances', 'Department Stores', 'Specialty Stores', 'Apparel/Footwear Retail', 
                    'Broadcasting & Cable', 'Furniture & Fixtures', 'Printing & Publishing', 'Consumer Electronics', 
                    'Tobacco', 'Restaurants', 'Home Improvement Retail']

# Create a column in the tweets dataframe indicating whether a given tweet stems from a b2b, b2c or other company.
b2b_companies = companies.loc[companies['Industry'].isin(b2b_industries)].Company.values
b2c_companies = companies.loc[companies['Industry'].isin(b2c_industries)].Company.values
context = []
for company in tweets['company']:
    if company in b2b_companies:
        context.append('B2B')
    elif company in b2c_companies:
        context.append('B2C')
    else:
        context.append('other')

tweets['context'] = context

In [15]:
b2b_b2c_test = pd.DataFrame(tweets.groupby(['context', 'company']).size(), columns=['total_tweets']).reset_index()
b2b_b2c_test = b2b_b2c_test.loc[b2b_b2c_test['context'] != 'other']

sus_tweets = []
b2b_b2c_sus_tweets = pd.DataFrame(tweets.groupby(['context', 'company', 'topic']).size()).reset_index()
b2b_b2c_sus_tweets = b2b_b2c_sus_tweets[b2b_b2c_sus_tweets['topic'] == 13]
for company in b2b_b2c_test.company:
    try:
        num_of_sus_tweets = int(b2b_b2c_sus_tweets[b2b_b2c_sus_tweets['company'] == company][0])
        sus_tweets.append(num_of_sus_tweets)
    except TypeError:
        sus_tweets.append(0)

b2b_b2c_test['sustainability_tweets'] = sus_tweets
b2b_b2c_test['sustainability_share'] = (b2b_b2c_test['sustainability_tweets'] / b2b_b2c_test['total_tweets']).round(3)

summary = b2b_b2c_test[b2b_b2c_test['context'] == 'B2B']['sustainability_tweets'].describe().to_frame()
summary = summary.rename(columns={'sustainability_tweets': 'B2B absolute sustainability tweets'})
summary['B2B relative sustainability tweets'] = b2b_b2c_test[b2b_b2c_test['context'] == 'B2B']['sustainability_share'].describe().to_frame().sustainability_share
summary['B2C absolute sustainability tweets'] = b2b_b2c_test[b2b_b2c_test['context'] == 'B2C']['sustainability_tweets'].describe().to_frame().sustainability_tweets
summary['B2C relative sustainability tweets'] = b2b_b2c_test[b2b_b2c_test['context'] == 'B2C']['sustainability_share'].describe().to_frame().sustainability_share
summary['B2B absolute sustainability tweets'] = summary['B2B absolute sustainability tweets'].astype(int)
summary['B2C absolute sustainability tweets'] = summary['B2C absolute sustainability tweets'].astype(int)

summary.style.format(precision = 3)\
            .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
            .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-top: 2px solid black; border-bottom: 2px solid black; font-size: 6pt; padding-bottom: 5px;'},
    {'selector': 'th.col_heading', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px; padding-left: 10px; min-width: 35px;'},
    {'selector': 'th.row_heading', 'props': 'font-family: Helvetica Neue;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left; text-align: center;'}])\
            .to_html(buf = f'{os.getcwd()}/b2b_b2c_summary.html')

In [None]:
def create_context_date_table(context):
    context_date_table = pd.DataFrame(tweets.loc[tweets['context'] == context].groupby('created_at').size(), columns=['total_tweets'])
    context_date_table.index.rename('week', inplace=True)
    context_date_table['sustainability_tweets'] = tweets.loc[(tweets['context'] == context) & (tweets['topic'] == 13)].groupby('created_at').size()
    context_date_table['sustainability_share'] = context_date_table['sustainability_tweets'] / context_date_table['total_tweets']
    context_date_table.reset_index(inplace=True)
    context_date_table['context'] = context
    context_date_table['week'] = context_date_table['week'].astype(str)
    pre_fff_median = context_date_table.iloc[:38]['sustainability_share'].median().round(3)
    post_fff_median = context_date_table.iloc[38:]['sustainability_share'].median().round(3)

    return context_date_table, pre_fff_median, post_fff_median

b2b_date, b2b_pre_fff_median, b2b_post_fff_median = create_context_date_table('B2B')
b2c_date, b2c_pre_fff_median, b2c_post_fff_median = create_context_date_table('B2C')
b2b_b2c_date = pd.merge(b2b_date, b2c_date, how='outer')

px.line(b2b_b2c_date, x="week", y="sustainability_share", color='context', color_discrete_sequence=['#346751', '#C84B31'], height=500, width=1000)\
    .update_layout(title=dict(text="B2B and B2C companies' overall shares of sustainability tweets per week", font=dict(family='Helvetica Neue', color='black')),
                    xaxis_title=dict(text='Week', font=dict(family='Helvetica Neue', color='black')),
                    xaxis=dict(tickfont=dict(family='Helvetica Neue', color='black'), type='category'),
                    yaxis_title=dict(text='Share of sustainability tweets', font=dict(family='Helvetica Neue', color='black')),
                    yaxis=dict(tickfont=dict(family='Helvetica Neue', color='black')),
                    autosize=False,
                    plot_bgcolor='#e6e6e3',
                    legend_title=dict(text='Context', font=dict(family='Helvetica Neue', color='black')),
                    legend=dict(font=dict(color='black')))\
    .add_shape(type="line", x0=0, y0=b2b_pre_fff_median, x1='2018-34', y1=b2b_pre_fff_median, line=dict(color="black", dash="dashdot"))\
    .add_shape(type="line", x0='2018-35', y0=b2b_post_fff_median, x1='2019-47', y1=b2b_post_fff_median, line=dict(color="black", dash="dashdot"))\
    .add_shape(type="line", x0=0, y0=b2c_pre_fff_median, x1='2018-34', y1=b2c_pre_fff_median, line=dict(color="black", dash="dashdot"))\
    .add_shape(type="line", x0='2018-35', y0=b2c_post_fff_median, x1='2019-47', y1=b2c_post_fff_median, line=dict(color="black", dash="dashdot"))\
    .add_annotation(x='2019-37', y=0.105, text="Post FFF B2B median", font=dict(family='Helvetica Neue', color='black'), showarrow=False)\
    .add_annotation(x='2018-04', y=0.092, text="Pre FFF B2B median", font=dict(family='Helvetica Neue', color='black'), showarrow=False)\
    .add_annotation(x='2019-37', y=0.026, text="Post FFF B2C median", font=dict(family='Helvetica Neue', color='black'), showarrow=False)\
    .add_annotation(x='2018-04', y=0.026, text="Pre FFF B2C median", font=dict(family='Helvetica Neue', color='black'), showarrow=False)\
    .add_vrect(x0="2018-34", x1="2018-35", annotation_text="FFF protests start", annotation_font_family='Helvetica Neue', annotation_position="top left", 
                fillcolor="black", opacity=0.25, line_width=0, annotation=dict(font_color='black'))\
    .show()

In [None]:
b2b_b2c_count = pd.DataFrame(tweets.groupby(['context', 'period', 'topic']).size()).reset_index()
b2b_b2c_count = b2b_b2c_count.loc[b2b_b2c_count['context'] != 'other']
b2b_b2c_count['topic'] = b2b_b2c_count['topic'].astype(str)
newnames = {str(x):y for x,y in zip(sorted(tweets.topic.unique().tolist()), topic_names)}

px.bar(b2b_b2c_count, x=0, y='context', color="topic", facet_row='period', width=1100, category_orders={'context': ['B2B', 'B2C'], 'period': ['Pre FFF', 'Post FFF']}, 
                        color_discrete_sequence=px.colors.qualitative.T10, orientation='h')\
    .update_layout(xaxis_title=dict(text='Number of tweets', font=dict(family='Helvetica Neue', color='black')),
                    xaxis=dict(tickfont=dict(family='Helvetica Neue', color='black')),
                    autosize=False,
                    plot_bgcolor='#e6e6e3',
                    legend_title=dict(text=''),
                    legend=dict(orientation='h', yanchor='bottom', y=1, xanchor='right', x=0.9, font=dict(family='Helvetica Neue', color='black')))\
    .update_yaxes(tickfont=dict(family='Helvetica Neue', color='black'), title=dict(text='', font=dict(family='Helvetica Neue', color='black')))\
    .for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], textangle=0, font=dict(family='Helvetica Neue', color='black')))\
    .for_each_trace(lambda t: t.update(name = newnames[t.name]))\
    .show()

In [None]:
import statsmodels.formula.api as sm
from scipy import stats

b2b_date = b2b_date[38:]
b2b_date['protesters'] = stats.zscore(fff.iloc[-1].astype(int).tolist())
b2b_date['sustainability_share'] = stats.zscore(b2b_date['sustainability_share'])

b2c_date = b2c_date[38:]
b2c_date['protesters'] = stats.zscore(fff.iloc[-1].astype(int).tolist())
b2c_date['sustainability_share'] = stats.zscore(b2c_date['sustainability_share'])

reg_b2b = sm.ols(formula='sustainability_share ~ protesters', data=b2b_date).fit()
print(reg_b2b.summary())
reg_b2c = sm.ols(formula='sustainability_share ~ protesters', data=b2c_date).fit()
print(reg_b2c.summary())

## Borders, a transnational movement and multinational companies
The following cells are dependent on the first cell in the 'B2B and B2C differences', the first cell in the 'Overall protests and tweeting' and the first cell in the 'Fridays for Future table' sections.

In [4]:
company_country_dict = {company: country for company, country in zip(companies['Company'], companies['Country'])}
tweets['country'] = [company_country_dict[row.company] for row in tweets.itertuples()]
top_countries = [country for country, no_of_companies in companies.groupby('Country').size().iteritems() if no_of_companies > 25]
tweets_by_top_countries = tweets[tweets.country.isin(top_countries)]
tweets_by_top_countries_13 = tweets[(tweets.country.isin(top_countries)) & (tweets['topic'] == 13)]

country_date = pd.DataFrame(tweets_by_top_countries.groupby(['created_at', 'country']).size(), columns=['total_tweets']).reset_index()
temp_df = pd.DataFrame(tweets_by_top_countries_13.groupby(['created_at', 'country']).size(), columns=['sust_tweets']).reset_index()
country_date = pd.merge(country_date, temp_df, on = ['created_at', 'country'], how = 'outer')
country_date.fillna(1, inplace=True)
country_date['sust_tweets'] = country_date['sust_tweets'].astype(int)
country_date['sust_share'] = country_date['sust_tweets'] / country_date['total_tweets']

In [None]:
px.line(country_date, x="created_at", y="sust_share", color='country', color_discrete_sequence= ['#7A7A78', '#1C684E', '#8DB3A7', '#EC9F92', '#D94025', '#732719'],
                        facet_col='country', facet_col_wrap=3, facet_row_spacing=0.12, width=1300, height=900)\
    .update_layout(plot_bgcolor='#e6e6e3')\
    .update_traces(showlegend=False)\
    .update_xaxes(tickfont=dict(family='Helvetica Neue', color='black'), type='category')\
    .update_xaxes(title=dict(text='Week', font=dict(family='Helvetica Neue', color='black')), row=1)\
    .update_yaxes(tickfont=dict(family='Helvetica Neue', color='black'))\
    .update_yaxes(title=dict(text='Share of sustainability tweets', font=dict(family='Helvetica Neue', color='black')), col=1)\
    .for_each_xaxis(lambda yaxis: yaxis.update(showticklabels=True))\
    .for_each_annotation(lambda a: a.update(text=a.text.split("=")[-1], textangle=0, font=dict(family='Helvetica Neue', color='black')))\
    .add_shape(type="line", x0=0, y0=general_pre_fff_median, x1='2018-33', y1=general_pre_fff_median, line=dict(color='black', dash="dashdot"), row='all', col='all')\
    .add_shape(type="line", x0='2018-35', y0=general_post_fff_median, x1='2019-47', y1=general_post_fff_median, line=dict(color='black', dash="dashdot"), row='all', col='all')\
    .add_annotation(x='2018-14', y=0.062, text="European pre FFF median", font=dict(family='Helvetica Neue', color='black', size=10), showarrow=False, row='all', col='all')\
    .add_annotation(x='2019-20', y=0.07, text="European post FFF median", font=dict(family='Helvetica Neue', color='black', size=10), showarrow=False, row='all', col='all')\
    .add_vrect(x0="2018-34", x1="2018-35", annotation_text="FFF protests start", annotation_font_family='Helvetica Neue', annotation_position="top left", 
                fillcolor="black", opacity=0.25, line_width=0, annotation=dict(font_color='black'))\
    .show()

In [50]:
reg_df = pd.DataFrame(columns=['Country', 'Coefficient', 'Standard Deviation', 't-Value', 'p-Value', 'F-Value', 'F-p-Value', 'R-Squared'])
for country in top_countries:
    temp_df = country_date[country_date['country'] == country][38:]
    temp_df['protesters'] = stats.zscore(fff.iloc[-1].astype(int).tolist())
    temp_df['sust_share'] = stats.zscore(temp_df['sust_share'])
    reg_temp = sm.ols(formula='sust_share ~ protesters', data=temp_df).fit()
    coef = reg_temp.params[1]
    if 0.01 < reg_temp.pvalues[1] < 0.05:
        coef = f'{coef:.3f}*'
    elif 0.001 < reg_temp.pvalues[1] < 0.01:
        coef = f'{coef:.3f}**'
    elif reg_temp.pvalues[1] < 0.001:
        coef = f'{coef:.3f}***'
    else:
        coef = f'{coef:.3f}'
    reg_df.loc[len(reg_df.index)] = {'Country': country,
                                        'Coefficient': coef,
                                        'Standard Deviation': reg_temp.bse[1],
                                        't-Value': reg_temp.tvalues[1],
                                        'p-Value': reg_temp.pvalues[1],
                                        'F-Value': reg_temp.fvalue,
                                        'F-p-Value': reg_temp.f_pvalue,
                                        'R-Squared': reg_temp.rsquared}

reg_df.set_index('Country', inplace=True)
reg_df.index.name = None
reg_df.style.format(precision=3)\
            .set_properties(**{'background-color': '#e6e6e3'}, subset = slice_grey)\
            .set_table_styles([
    {'selector': '', 'props': 'border-spacing: 0px; border-top: 2px solid black; border-bottom: 2px solid black; font-size: 6pt; padding-bottom: 5px;'},
    {'selector': 'th.col_heading', 'props': 'font-family: Helvetica Neue; font-weight: bold; text-align: left; border-bottom: 1px solid black; padding-top: 10px; padding-bottom: 5px; padding-left: 10px; min-width: 35px;'},
    {'selector': 'th.row_heading', 'props': 'font-family: Helvetica Neue;'},
    {'selector': 'td', 'props': 'border-width: 0px; font-family: Helvetica Neue; text-align: left; text-align: center;'}])\
            .to_html(buf = f'{os.getcwd()}/countries_reg.html')