In [43]:
import pandas as pd
import numpy as np
from math import log2
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter
import matplotlib.ticker as mticker
from scipy.stats import mannwhitneyu
from scipy.stats import spearmanr
from statsmodels.stats.inter_rater import fleiss_kappa
from statsmodels.stats import inter_rater as irr
from statsmodels.tsa.stattools import adfuller
import pingouin as pg
pd.options.mode.chained_assignment = None

In [44]:
def spearmanr_pval(x,y):
    return spearmanr(x,y)[1]

In [45]:
def get_Fame(arr):
    first_entry = np.where(arr != 0)[0][0]
    return np.median(arr[first_entry:])

In [46]:
def get_Fame_ave(arr):
    first_entry = np.where(arr != 0)[0][0]
    return np.mean(arr[first_entry:])

In [47]:
def cronbach_alpha(df):
    df_corr = df.corr()
    N = df.shape[1]
    rs = np.array([])
    for i, col in enumerate(df_corr.columns):
        sum_ = df_corr[col][i+1:].values
        rs = np.append(sum_, rs)
    mean_r = np.mean(rs)
    
    cronbach_alpha = (N * mean_r) / (1 + (N - 1) * mean_r)
    return cronbach_alpha

In [48]:
Mass_shooting_df = pd.read_excel("Mass Shootings (08_1966-04_2021).xlsx", sheet_name = "Mass_Shootings")
Mass_shooting_df = Mass_shooting_df.drop_duplicates(subset=['Date'])
Mass_shooting_df = Mass_shooting_df.reset_index()
N = len(Mass_shooting_df)
Variables = ["Target_Group","Shooting_Location","Level_of_Security","Planned_Shooting"]
wo = np.zeros([N,N])
for i in range(N):
    for j in range(i):
        wo[i,j] = 1
for Variable in Variables:
    Mass_shooting_df = Mass_shooting_df.reindex(columns = Mass_shooting_df.columns.tolist() + [Variable+"_Surprisal"])
    
for index , Variable in enumerate(Variables):
    uniques = np.unique(Mass_shooting_df[Variable])
    L = len(uniques)
    C = np.zeros([N, int(L)])
    J = np.ones([N, int(L)])
    for i in range(N):
        for j in range(len(uniques)):
            C[i, j] = 1 if (Mass_shooting_df[Variable][i] == uniques[j]) else 0
    priorDis = np.dot(wo,C)
    choices = np.argwhere(C==1)[:,1]
    S = np.array([])
    x = np.array([])
    colors = np.array([])
    fs = np.array([])
    nfs = np.array([])
    for i in range(1,N):
        history = priorDis[i,:]
        choice = priorDis[i,choices[i]]
        y = -log2((choice+1)/(L+np.sum(history)))
        Mass_shooting_df[Variable+"_Surprisal"][i] = y
Mass_shooting_df = Mass_shooting_df.iloc[1:]
Mass_shooting_df = Mass_shooting_df.reset_index()

In [49]:
Sources = ["TVP|Silva|Lankford"]
resuls = pd.DataFrame(columns=Variables)
resuls["Sources"] = Sources
resuls = resuls.set_index('Sources')

for index, source in enumerate(Sources):
    for Variable in Variables:
        fs = np.array(Mass_shooting_df.loc[Mass_shooting_df[source]][Variable+"_Surprisal"])
        nfs = np.array(Mass_shooting_df.loc[Mass_shooting_df["TVP|Silva|Lankford"]==False][Variable+"_Surprisal"])
        pval = mannwhitneyu(nfs, fs, alternative='less',method='auto')
        resuls[Variable][index] = pval
resuls

Unnamed: 0_level_0,Target_Group,Shooting_Location,Level_of_Security,Planned_Shooting
Sources,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
TVP|Silva|Lankford,"(1308.5, 0.0005942085152366369)","(984.0, 5.125719673757951e-06)","(2052.0, 0.2874536553488459)","(1243.0, 0.0002527363393351984)"


In [50]:
fame_seeking_df = Mass_shooting_df[(Mass_shooting_df["TVP|Silva|Lankford"]==True)]
Extended = fame_seeking_df[(fame_seeking_df["Used an Extended Magazine"]==1.0)]
NotExtended = fame_seeking_df[(fame_seeking_df["Used an Extended Magazine"]==0.0)]
print(mannwhitneyu(Extended["Target_Group_Surprisal"], NotExtended["Target_Group_Surprisal"], method='auto'))
print(mannwhitneyu(Extended["Shooting_Location_Surprisal"], NotExtended["Shooting_Location_Surprisal"], method='auto'))
print(mannwhitneyu(Extended["Level_of_Security_Surprisal"], NotExtended["Level_of_Security_Surprisal"], method='auto'))
print(mannwhitneyu(Extended["Planned_Shooting_Surprisal"], NotExtended["Planned_Shooting_Surprisal"], method='auto'))

MannwhitneyuResult(statistic=29.0, pvalue=0.10355434039644565)
MannwhitneyuResult(statistic=36.0, pvalue=0.24324252779199185)
MannwhitneyuResult(statistic=29.0, pvalue=0.10210031730374279)
MannwhitneyuResult(statistic=36.0, pvalue=0.2508692680317394)


In [51]:
Wikipedia_df = pd.read_excel("Mass Shootings (08_1966-04_2021).xlsx", sheet_name = "Wikiepdia")
for col in Wikipedia_df.columns:
    if col != "DateTime":
        Wikipedia_df[col] = pd.to_numeric(Wikipedia_df[col])
Wikipedia_df["DateTime"] = pd.to_datetime(Wikipedia_df["DateTime"])

In [52]:
Mass_shooting_df["Fame"] = np.zeros([len(Mass_shooting_df)])
Mass_shooting_df["Fame_av"] = np.zeros([len(Mass_shooting_df)])

for i in range(len(Mass_shooting_df)):
    if (Mass_shooting_df["WikiPage_reference"][i]=="There's a wikipedia page but not on the data site") | (Mass_shooting_df["WikiPage_reference"][i]=="No_page") | (Mass_shooting_df["WikiPage_reference"][i]=="Corrupted"): 
        Mass_shooting_df["Fame"][i] = np.nan
        Mass_shooting_df["Fame_av"][i] = np.nan
    else:
        fame_score = get_Fame(Wikipedia_df[Mass_shooting_df["WikiPage_reference"][i]].values)
        Mass_shooting_df["Fame"][i] = fame_score
        fame_score_av = get_Fame_ave(Wikipedia_df[Mass_shooting_df["WikiPage_reference"][i]].values)
        Mass_shooting_df["Fame_av"][i] = fame_score_av

In [53]:
Correlation_df = Mass_shooting_df.dropna(subset=['Fame'])[["Fame","Fame_av","Injured","Fatalities","Target_Group_Surprisal",
                                                          "Shooting_Location_Surprisal","Level_of_Security_Surprisal",
                                                          "Planned_Shooting_Surprisal"]]

In [54]:
display(pg.partial_corr(data=Correlation_df, x='Fame', y='Target_Group_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame', y='Shooting_Location_Surprisal', covar=['Fatalities','Injured'], method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame', y='Level_of_Security_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame', y='Planned_Shooting_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))

Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.3033,"[0.1, 0.48]",0.0045


Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.3225,"[0.12, 0.5]",0.0025


Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.0002,"[-0.21, 0.21]",0.9983


Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.1698,"[-0.04, 0.37]",0.118


In [55]:
display(pg.partial_corr(data=Correlation_df, x='Fame_av', y='Target_Group_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame_av', y='Shooting_Location_Surprisal', covar=['Fatalities','Injured'], method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame_av', y='Level_of_Security_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))
display(pg.partial_corr(data=Correlation_df, x='Fame_av', y='Planned_Shooting_Surprisal', covar=['Fatalities','Injured'],method = "spearman").round(4))

Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.2845,"[0.08, 0.47]",0.0079


Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.2842,"[0.08, 0.47]",0.008


Unnamed: 0,n,r,CI95%,p-val
spearman,88,-0.003,"[-0.21, 0.21]",0.9783


Unnamed: 0,n,r,CI95%,p-val
spearman,88,0.177,"[-0.04, 0.37]",0.1031


In [56]:
Correlation_df.corr(method='spearman')

Unnamed: 0,Fame,Fame_av,Injured,Fatalities,Target_Group_Surprisal,Shooting_Location_Surprisal,Level_of_Security_Surprisal,Planned_Shooting_Surprisal
Fame,1.0,0.979089,0.487801,0.48975,0.29332,0.336701,0.003621,0.299972
Fame_av,0.979089,1.0,0.490562,0.485059,0.278379,0.306378,0.002061,0.304895
Injured,0.487801,0.490562,1.0,0.461115,0.089697,0.157072,0.081997,0.217028
Fatalities,0.48975,0.485059,0.461115,1.0,0.046597,0.068947,-0.070928,0.281821
Target_Group_Surprisal,0.29332,0.278379,0.089697,0.046597,1.0,0.300204,0.253903,0.328635
Shooting_Location_Surprisal,0.336701,0.306378,0.157072,0.068947,0.300204,1.0,0.16683,0.092505
Level_of_Security_Surprisal,0.003621,0.002061,0.081997,-0.070928,0.253903,0.16683,1.0,-0.001008
Planned_Shooting_Surprisal,0.299972,0.304895,0.217028,0.281821,0.328635,0.092505,-0.001008,1.0


In [57]:
Correlation_df.corr(method=spearmanr_pval)

Unnamed: 0,Fame,Fame_av,Injured,Fatalities,Target_Group_Surprisal,Shooting_Location_Surprisal,Level_of_Security_Surprisal,Planned_Shooting_Surprisal
Fame,1.0,2.924949e-61,1e-06,1e-06,0.005545,0.001339,0.973291,0.004519
Fame_av,2.924949e-61,1.0,1e-06,2e-06,0.008634,0.003694,0.984796,0.003872
Injured,1.431811e-06,1.222709e-06,1.0,6e-06,0.405927,0.143881,0.447566,0.042248
Fatalities,1.280994e-06,1.67255e-06,6e-06,1.0,0.666397,0.523281,0.511387,0.007813
Target_Group_Surprisal,0.005545256,0.008633954,0.405927,0.666397,1.0,0.004487,0.016985,0.001771
Shooting_Location_Surprisal,0.001338582,0.003694392,0.143881,0.523281,0.004487,1.0,0.120296,0.391333
Level_of_Security_Surprisal,0.9732911,0.984796,0.447566,0.511387,0.016985,0.120296,1.0,0.99256
Planned_Shooting_Surprisal,0.004519059,0.00387222,0.042248,0.007813,0.001771,0.391333,0.99256,1.0


In [58]:
fame_seeking_df = Mass_shooting_df[(Mass_shooting_df["TVP|Silva|Lankford"]==True)]
Correlation_FS_df = fame_seeking_df.dropna(subset=['Fame'])[["Fame","Fame_av","Injured","Fatalities","Target_Group_Surprisal",
                                                          "Shooting_Location_Surprisal","Level_of_Security_Surprisal",
                                                          "Planned_Shooting_Surprisal"]]
display(Correlation_FS_df.corr(method="spearman")[["Injured","Fatalities"]])
display(Correlation_FS_df.corr(method=spearmanr_pval)[["Injured","Fatalities"]])

Unnamed: 0,Injured,Fatalities
Fame,0.488521,0.808002
Fame_av,0.459917,0.788386
Injured,1.0,0.422207
Fatalities,0.422207,1.0
Target_Group_Surprisal,0.206248,-0.330443
Shooting_Location_Surprisal,0.229292,-0.149435
Level_of_Security_Surprisal,-0.110818,-0.219116
Planned_Shooting_Surprisal,0.063229,-0.156168


Unnamed: 0,Injured,Fatalities
Fame,0.028848,1.6e-05
Fame_av,0.041317,3.6e-05
Injured,1.0,0.063683
Fatalities,0.063683,1.0
Target_Group_Surprisal,0.382984,0.154744
Shooting_Location_Surprisal,0.330833,0.529475
Level_of_Security_Surprisal,0.641846,0.353319
Planned_Shooting_Surprisal,0.791143,0.510866


In [59]:
df_Target_group = pd.read_excel("Mass Shootings (08_1966-04_2021).xlsx", sheet_name = "Alpha&Kappa_TargetGroup")
df_Target_group = df_Target_group.set_index('subject')
df_Target_group = df_Target_group.replace("School Community",3)
df_Target_group = df_Target_group.replace("Random",2)
df_Target_group = df_Target_group.replace("Interaction",1)
df_Target_group = df_Target_group.replace("Ideology",0)
Target_group = df_Target_group.to_numpy()
Target_group_Kappa = irr.fleiss_kappa(irr.aggregate_raters(Target_group)[0], method='rand')
Target_group_Alpha = cronbach_alpha(df_Target_group)

In [60]:
df_Level_of_Security = pd.read_excel("Mass Shootings (08_1966-04_2021).xlsx", sheet_name = "Alpha&Kappa_LvlofSecurity")
df_Level_of_Security = df_Level_of_Security.set_index('subject')
df_Level_of_Security = df_Level_of_Security.replace("No-Security", 0)
df_Level_of_Security = df_Level_of_Security.replace("Low-Security", 1)
df_Level_of_Security = df_Level_of_Security.replace("High-Security", 2)
Level_of_Security = df_Level_of_Security.to_numpy()
Level_of_Security_Kappa = irr.fleiss_kappa(irr.aggregate_raters(Level_of_Security)[0], method='rand')
Level_of_Security_Alpha = cronbach_alpha(df_Level_of_Security)

In [61]:
print("Target Group Alpha: ", Target_group_Alpha)
print("Target Group Kappa: ", Target_group_Kappa)
print("Level of Security Alpha: ", Level_of_Security_Alpha)
print("Level of Security Kappa: ", Level_of_Security_Kappa)

Target Group Alpha:  0.8285343718965177
Target Group Kappa:  0.5559689922480623
Level of Security Alpha:  0.9163400080242414
Level of Security Kappa:  0.6490697674418604


In [62]:
display(Mass_shooting_df[(Mass_shooting_df["TVP|Silva|Lankford"]==True)][["Target_Group_Surprisal","Shooting_Location_Surprisal","Level_of_Security_Surprisal","Planned_Shooting_Surprisal","Total Firearms Brought to the Scene"]].corr(method='spearman'))
display(Mass_shooting_df[(Mass_shooting_df["TVP|Silva|Lankford"]==True)][["Target_Group_Surprisal","Shooting_Location_Surprisal","Level_of_Security_Surprisal","Planned_Shooting_Surprisal","Total Firearms Brought to the Scene"]].corr(method=spearmanr_pval))

Unnamed: 0,Target_Group_Surprisal,Shooting_Location_Surprisal,Level_of_Security_Surprisal,Planned_Shooting_Surprisal,Total Firearms Brought to the Scene
Target_Group_Surprisal,1.0,0.521418,0.339426,0.23913,0.143769
Shooting_Location_Surprisal,0.521418,1.0,0.248966,0.201348,0.084404
Level_of_Security_Surprisal,0.339426,0.248966,1.0,0.177546,-0.390122
Planned_Shooting_Surprisal,0.23913,0.201348,0.177546,1.0,-0.351831
Total Firearms Brought to the Scene,0.143769,0.084404,-0.390122,-0.351831,1.0


Unnamed: 0,Target_Group_Surprisal,Shooting_Location_Surprisal,Level_of_Security_Surprisal,Planned_Shooting_Surprisal,Total Firearms Brought to the Scene
Target_Group_Surprisal,1.0,0.008976,0.104661,0.260428,0.50272
Shooting_Location_Surprisal,0.008976,1.0,0.240733,0.345449,0.694968
Level_of_Security_Surprisal,0.104661,0.240733,1.0,0.406548,0.059479
Planned_Shooting_Surprisal,0.260428,0.345449,0.406548,1.0,0.091796
Total Firearms Brought to the Scene,0.50272,0.694968,0.059479,0.091796,1.0


In [63]:
count = 0
for col in Wikipedia_df.columns:
    if col != "DateTime":
        arr = Wikipedia_df[col].values
        nonzeroarrTS = arr[np.where(arr != 0)[0][0]:]
        print(adfuller(nonzeroarrTS)[1])
        if adfuller(nonzeroarrTS)[1] > 0.05:
            count +=1
print(count)

0.0
5.4167899814727056e-20
0.0
6.741717043509473e-16
2.828670632253555e-23
4.5061028867105114e-12
0.0
2.2283441965692223e-07
2.0804671579393384e-06
0.0006037053086086647
2.9115142545854647e-24
7.309415989851811e-15
3.036596768671548e-16
6.276087821767912e-29
2.4945406187211212e-06
1.2384912450099632e-06
4.792002725274795e-07
0.0
6.51427925927738e-24
1.2432970774371459e-14
7.347073407540974e-16
2.7870278986638254e-19
6.1749252968037914e-12
4.075993977801848e-27
3.2957804509158117e-07
3.4957371671137805e-22
9.054059303690498e-11
0.037020407933710156
9.497199304987138e-15
3.169365157553058e-14
8.705315180371625e-11
1.6964371746675632e-22
9.29962784476465e-09
4.415972457214874e-19
9.574802624322004e-22
5.705332574507773e-18
1.4596346068839053e-21
1.0554755702890227e-19
1.2981894002493864e-17
4.735830923816811e-17
4.1086187318434984e-16
2.3623976917342825e-06
9.853516231655982e-12
1.9471704795945538e-25
5.750204315391932e-25
8.540999942499207e-29
2.4662634209858775e-15
3.384402762389203e-27