In [1]:
import pandas as pd
import spotipy
import numpy as np
from spotipy.oauth2 import SpotifyOAuth
import networkx as nx
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objects as go
import json
import plotly.express as px
import kaleido
import plotly.io as pio
pio.kaleido.scope.mathjax = None


In [27]:
twitter = pd.read_csv("Clean_Data/Twitter.csv")
facebook = pd.read_csv("Clean_Data/Facebook.csv")
google = pd.read_csv("Clean_Data/Google.csv")
twitter["site"] = "Twitter"
facebook["site"] = "Facebook"
google["site"] = "Google"
common_names = ["site", "year", "half", "total_requests", "total_requests_accounts"]

df = pd.concat([twitter[common_names], facebook[common_names], google[common_names]])
df = df[df.year > 2013]
df = df.dropna()
df.groupby(["site", "year"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,half,total_requests,total_requests_accounts
site,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Facebook,2014,1.5,14853.5,22699.0
Facebook,2015,1.5,18406.0,28310.0
Facebook,2016,1.5,24934.0,40221.5
Facebook,2017,1.5,32729.0,52952.5
Facebook,2018,1.5,41901.0,67075.0
Facebook,2019,1.5,50931.0,82391.0
Facebook,2020,1.5,61395.0,104192.5
Facebook,2021,1.0,63657.0,111117.0
Google,2014,1.5,13405.0,29795.0
Google,2015,1.5,17358.0,41275.0


In [29]:
df["accounts_per_request"] = df.apply(lambda t: t.total_requests_accounts / t.total_requests, axis=1)
df.groupby("year").accounts_per_request.describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2014,6.0,1.833358,0.45245,1.522418,1.527774,1.627122,1.955609,2.663724
2015,6.0,2.202141,0.546245,1.512147,1.726516,2.380668,2.582203,2.781519
2016,6.0,2.177212,0.58735,1.594987,1.734077,2.109424,2.37408,3.178175
2017,6.0,2.387924,1.136136,1.597995,1.743545,2.118493,2.203013,4.642817
2018,6.0,2.453332,0.987159,1.539143,1.707009,2.272144,2.804961,4.135365
2019,6.0,2.301186,0.870335,1.610317,1.708238,2.101367,2.375643,3.949802
2020,6.0,3.414631,3.055978,1.669404,1.86388,2.301963,2.760157,9.583719
2021,1.0,1.745558,,1.745558,1.745558,1.745558,1.745558,1.745558


In [30]:
df["time"] = df.apply(lambda y: ("Jan-Jun" if y.half == 1 else "Jul-Dec") + " " + str(y.year), axis=1)

In [41]:
fig = go.Figure()
fig.update_layout({
    'plot_bgcolor': 'rgba(0,0,0,0)',
    'paper_bgcolor': 'rgba(0,0,0,0)',
    'title': "Average Number of Accounts Surveilled per Government Request",
    'xaxis_title':"Time Period",
    'yaxis_title':"Accounts Surveilled / Requests",
    'titlefont': {
        'size': 24
    },
    'font': {
        'family': "SansSerif",
        'color': "#636efa",
        'size': 16
    },
    'yaxis': {
        'showgrid': False,
        'zeroline': False,
        'showline': False,
        'showticklabels': True,
        'color': "#636efa"
    },
    'xaxis': {
        'showgrid': False,
        'zeroline': False,
        'showline': False,
        'color': "#636efa",
        'showticklabels': True
    }
})

fig.add_trace(
    go.Scatter(
        x=df.time,
        y=df.accounts_per_request,
        mode='markers',
        marker=dict(
            color=[
                (
                    "#ce3c30" if site == "Google" else (
                        "#1d9bf0" if site == "Twitter" else "#2374e1"
                    )
                ) for site in df.site],
            size=14,
            line=dict(
                color='black',
                width=2
            )
        ),
        hovertext=df.apply(
            lambda r: (
                "Site: " + r.site + 
                "<br>Requests: " + str(int(r.total_requests)) +
                "<br>Accounts: " + str(int(r.total_requests_accounts))
            ),
            axis=1)
    )
)

fig.show()

In [43]:
fig.write_html("Graphics/accounts_per_request.html")