## Google Take Down Requests and Election Years Correlation
- Used 'Google Government Take Down Requets dataset and selected the 10 countries with the most requests. Self compiled a datatset of heads of state elections from the years 2013 - 2020 to look at correlation between election years and take down requests.

In [1]:
import pandas as pd
from scipy.stats import spearmanr

In [29]:
data = pd.read_csv('top_10.csv')

In [51]:
data['Country/Region'].unique()

array(['Australia', 'Brazil', 'France', 'Germany', 'India', 'Russia',
       'South Korea', 'Türkiye', 'United Kingdom', 'United States'],
      dtype=object)

In [30]:
def creating_df(country):
    df = data[data['Country/Region'] == country].copy()
    df['Period Ending'] = pd.to_datetime(df['Period Ending'])
    df['Year'] = df['Period Ending'].dt.year
    df.groupby('Year')['Total'].sum().sort_values(ascending=False)
    totals = df.groupby(['Reason', 'Year'])['Total'].sum().reset_index(name='Total Sum')

    return totals

In [31]:
creating_df('Russia')

Unnamed: 0,Reason,Year,Total Sum
0,Adult Content,2013,3
1,Adult Content,2014,2
2,Adult Content,2015,6
3,Adult Content,2016,21
4,Adult Content,2017,56
...,...,...,...
140,Violence,2016,130
141,Violence,2017,532
142,Violence,2018,525
143,Violence,2019,550


In [52]:
rus = creating_df('Russia')
rus

Unnamed: 0,Reason,Year,Total Sum
0,Adult Content,2013,3
1,Adult Content,2014,2
2,Adult Content,2015,6
3,Adult Content,2016,21
4,Adult Content,2017,56
...,...,...,...
140,Violence,2016,130
141,Violence,2017,532
142,Violence,2018,525
143,Violence,2019,550


In [35]:
def reasons_dict(totals):

    data = {}
    # unique reasons
    for reason in totals['Reason'].unique():
        reason_data = totals[totals['Reason'] == reason]
        
        # reasons with year and total sum
        reasons = {
            'year': [2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020],
            'Total Sum': [0] * 8
        }
        for _, row in reason_data.iterrows():
            year = row['Year']
            total_sum = row['Total Sum']
            reasons['Total Sum'][reasons['year'].index(year)] = total_sum
        
        data[reason] = reasons

    return data

In [34]:
def corr(data, election):
    correlations = []

    for reason, reason_data in data.items():
        correlation, p_value = spearmanr(reason_data['Total Sum'], election)
        correlations.append((reason, correlation, p_value))

    correlations_df = pd.DataFrame(correlations, columns=['Reason', 'Correlation', 'P-Value'])

    return correlations_df

In [36]:
# full data
def pval_cor(country, election):
    totals = creating_df(country)
    data = reasons_dict(totals)

    year_requests = list(totals.groupby('Year')['Total Sum'].sum())

    correlation, p_value = spearmanr(year_requests, election)
    print('correlation:', correlation)
    print('p_value:', p_value)
    
    return corr(data, election)

## Russia

In [37]:
# 2018 election
election = [0, 0, 0, 0, 0, 1, 0, 0]
pval_cor('Russia', election)

correlation: 0.41239304942116123
p_value: 0.30995873550624103


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.082479,0.846053
1,Bullying/Harassment,-0.422577,0.29694
2,Business Complaints,0.530849,0.175842
3,Copyright,0.412393,0.309959
4,Defamation,0.412393,0.309959
5,Drug Abuse,0.412393,0.309959
6,Electoral Law,0.380952,0.351813
7,Fraud,0.082974,0.845136
8,Geographical Dispute,0.425243,0.293576
9,Government Criticism,0.082479,0.846053


## India

In [38]:
# Parliament election 2014 2019
election = [0, 1, 0, 0, 0, 0, 1, 0]
pval_cor('India', election)

correlation: 0.0
p_value: 1.0


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.760469,0.028481
1,Bullying/Harassment,0.0,1.0
2,Business Complaints,-0.069007,0.871023
3,Copyright,-0.125988,0.76626
4,Defamation,0.125988,0.76626
5,Drug Abuse,-0.218218,0.603645
6,Electoral Law,0.536925,0.170018
7,Fraud,-0.190117,0.652027
8,Geographical Dispute,-0.135147,0.749667
9,Government Criticism,0.318788,0.441534


## Brazil

In [39]:
# 2014 and 2018 pres
election = [0, 1, 0, 0, 0, 1, 0, 0]
pval_cor('Brazil', election)

correlation: 0.12598815766974242
p_value: 0.7662600657593479


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.063372,0.881494
1,Bullying/Harassment,0.25349,0.544675
2,Business Complaints,-0.54059,0.166557
3,Copyright,-0.377964,0.355918
4,Defamation,0.125988,0.76626
5,Drug Abuse,-0.454699,0.257663
6,Electoral Law,0.506979,0.199739
7,Fraud,-0.387298,0.343166
8,Government Criticism,0.125988,0.76626
9,Hate Speech,0.261488,0.531601


## Turkey


In [40]:
# 2014 and 2018 pres
election = [0, 1, 0, 0, 0, 1, 0, 0]
pval_cor('Türkiye', election)

correlation: -0.12598815766974242
p_value: 0.7662600657593479


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,-0.701334,0.052575
1,Bullying/Harassment,0.57735,0.133975
2,Business Complaints,-0.324785,0.432498
3,Copyright,-0.190117,0.652027
4,Defamation,0.0,1.0
5,Drug Abuse,0.0,1.0
6,Electoral Law,-0.218218,0.603645
7,Fraud,0.412393,0.309959
8,Government Criticism,0.191273,0.650014
9,Hate Speech,0.8,0.01712


## US

In [41]:
# 2016 and 2020 pres
election = [0, 0, 0, 1, 0, 0, 0, 1]
pval_cor('United States', election)

correlation: -0.25197631533948484
p_value: 0.5471616124270275


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,-0.454699,0.257663
1,Bullying/Harassment,-0.251976,0.547162
2,Business Complaints,0.529813,0.176845
3,Copyright,0.0,1.0
4,Defamation,-0.190117,0.652027
5,Drug Abuse,0.149071,0.724605
6,Fraud,0.25349,0.544675
7,Geographical Dispute,0.247436,0.554646
8,Government Criticism,0.072739,0.864094
9,Hate Speech,-0.333333,0.419753


## Germany

In [47]:
# 2013 and 2017 - FEDERAL HEAD OF STATE pres is SYMBOLIC
election = [1, 0, 0, 0, 1, 0, 0, 0]
pval_cor('United States', election)

correlation: 0.5039526306789697
p_value: 0.2028844491499782


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.389742,0.339863
1,Bullying/Harassment,0.377964,0.355918
2,Business Complaints,0.0,1.0
3,Copyright,-0.125988,0.76626
4,Defamation,0.506979,0.199739
5,Drug Abuse,0.745356,0.033798
6,Fraud,0.063372,0.881494
7,Geographical Dispute,0.412393,0.309959
8,Government Criticism,-0.436436,0.279658
9,Hate Speech,0.333333,0.419753


## France

In [43]:
# 2017 - president
election = [0, 0, 0, 0, 1, 0, 0, 0]
pval_cor('France', election)

correlation: -0.08247860988423225
p_value: 0.8460525238666443


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.353899,0.389762
1,Bullying/Harassment,-0.50087,0.206114
2,Business Complaints,-0.142857,0.735765
3,Copyright,-0.082974,0.845136
4,Defamation,0.082479,0.846053
5,Drug Abuse,-0.142857,0.735765
6,Fraud,-0.085049,0.841301
7,Geographical Dispute,-0.142857,0.735765
8,Government Criticism,0.353899,0.389762
9,Hate Speech,-0.248922,0.552193


## South Korea

In [44]:
# 2017 - president
election = [0, 0, 0, 0, 1, 0, 0, 0]
pval_cor('South Korea', election)


correlation: 0.08247860988423225
p_value: 0.8460525238666443


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,0.417392,0.303535
1,Bullying/Harassment,-0.169031,0.689052
2,Copyright,0.248922,0.552193
3,Defamation,0.082479,0.846053
4,Drug Abuse,0.253546,0.544582
5,Electoral Law,0.087875,0.836081
6,Fraud,0.580818,0.131102
7,Geographical Dispute,-0.142857,0.735765
8,Government Criticism,0.380952,0.351813
9,Impersonation,0.48795,0.219944


## UK

In [48]:
# 2015, 2017 and 2019 - Prime Minister
election = [0, 0, 1, 0, 1, 0, 1, 0]
pval_cor('United Kingdom', election)


correlation: 0.2817180849095055
p_value: 0.49906137962992314


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,-0.442627,0.272102
1,Bullying/Harassment,-0.732467,0.03878
2,Business Complaints,-0.066667,0.87537
3,Copyright,-0.526235,0.180335
4,Defamation,-0.519615,0.186887
5,Drug Abuse,0.149071,0.724605
6,Electoral Law,-0.29277,0.481618
7,Fraud,0.241759,0.564055
8,Geographical Dispute,-0.29277,0.481618
9,Government Criticism,-0.58554,0.127247


## Aust

In [46]:
# 2013, 2016, 2019 - Prime Minister
election = [1, 0, 0, 1, 0, 0, 1, 0]
pval_cor('Australia', election)


correlation: -0.05634361698190111
p_value: 0.8945790910017699


Unnamed: 0,Reason,Correlation,P-Value
0,Adult Content,-0.13012,0.758765
1,Bullying/Harassment,0.113364,0.789257
2,Business Complaints,-0.29277,0.481618
3,Copyright,0.28341,0.496375
4,Defamation,-0.510138,0.196483
5,Drug Abuse,-0.29277,0.481618
6,Fraud,0.149071,0.724605
7,Government Criticism,0.073771,0.86218
8,Hate Speech,0.52048,0.186024
9,Impersonation,-0.29277,0.481618
