In [1]:
#The Armed Conflict Location & Event Data Project (ACLED) is a disaggregated conflict collection, analysis and crisis
#mapping project.
#ACLED collects the dates, actors, types of violence, locations, and fatalities of all reported political violence 
#and protest events.
#Political violence and protest includes events that occur within civil wars and periods of instability, public protest 
#and regime breakdown.
# Data collected from India during the period of 26-January-2016 to 26-January-2019
#source: https://www.acleddata.com/data/
#Raleigh, Clionadh, Andrew Linke, Håvard Hegre and Joakim Karlsen. (2010).
#“Introducing ACLED-Armed Conflict Location and Event Data.” Journal of Peace
#Research 47(5) 651-660.

In [2]:
import numpy as np
import pandas as pd
from datetime import datetime, timedelta
from scipy import stats
#import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

In [3]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
import plotly.graph_objs as go
from plotly import tools

In [4]:
pd.set_option('display.max_columns', 50)

In [5]:
data=pd.read_csv('ACLED_data_India.csv')

In [6]:
# load codes description data
inter_codes=pd.read_csv('Inter_codes.csv')
geo_precision_codes=pd.read_csv('geo_precision_code.csv')
time_precision_codes=pd.read_csv('time_precision_code.csv')

In [7]:
data.head()

Unnamed: 0,data_id,iso,event_id_cnty,event_id_no_cnty,event_date,year,time_precision,event_type,actor1,assoc_actor_1,inter1,actor2,assoc_actor_2,inter2,interaction,region,country,admin1,admin2,admin3,location,latitude,longitude,geo_precision,source,source_scale,notes,fatalities,timestamp,iso3
0,2945166,356,IND44426,44426.0,26 January 2019,2019,1,Violence against civilians,Police Forces of India (2014-) Border Security...,,1,Civilians (Pakistan),,7,17,Southern Asia,India,Jammu and Kashmir,Samba,Samba,Samba,32.5625,75.1199,2,Asian News International,Regional,"On 26 Jan, BSF troops fired on an alleged Paki...",0,1548775812,IND
1,2945167,356,IND44427,44427.0,26 January 2019,2019,1,Battle-No change of territory,Unidentified Armed Group (India),,3,Military Forces of India (2014-) Rashtriya Rifles,Police Forces of India (2014-) Central Reserve...,1,13,Southern Asia,India,Jammu and Kashmir,Srinagar,Srinagar North,Srinagar,34.0857,74.8056,1,Asian News International; Rising Kashmir,Regional,"On 26 Jan, two JeM militants were killed in a ...",2,1548775812,IND
2,2945168,356,IND44428,44428.0,26 January 2019,2019,1,Riots/Protests,Protesters (India),,6,,,0,60,Southern Asia,India,Jammu and Kashmir,Pulwama,Pulwama,Arihal,33.8139,74.8937,1,Rising Kashmir,Subnational,"On 26 Jan, mourners raised anti-India and pro-...",0,1548775812,IND
3,2945169,356,IND44429,44429.0,26 January 2019,2019,1,Riots/Protests,Protesters (India),Journalists (India),6,,,0,60,Southern Asia,India,Jammu and Kashmir,Srinagar,Srinagar North,Srinagar,34.0857,74.8056,1,Kashmir News Service,Subnational,"On 26 Jan, journalists staged a protest rally ...",0,1548775812,IND
4,2945170,356,IND44430,44430.0,26 January 2019,2019,1,Battle-No change of territory,Military Forces of India (2014-),Police Forces of India (2014-) Border Security...,1,Military Forces of Pakistan (2018-),,8,18,Southern Asia,India,Jammu and Kashmir,Poonch,Mendhar,Mankote,33.6233,74.0561,2,Chandigarh Tribune,Subnational,"On 26 Jan, no injuries were reported when Indi...",0,1548775812,IND


In [8]:
inter_codes

Unnamed: 0,inter_code,description
0,0,No actor
1,1,Government and state security services
2,2,Rebel Groups
3,3,Political Militias
4,4,Identity Militias
5,5,Rioters
6,6,Protestors
7,7,Civilians
8,8,External/other forces


In [9]:
geo_precision_codes

Unnamed: 0,geo_precision_code,precision_level
0,1,highest (exact location)
1,2,intermediate (regional)
2,3,lowest (provincial)


In [10]:
time_precision_codes

Unnamed: 0,time_precision_code,precision_level
0,1,highest (day)
1,2,intermediate (week)
2,3,lowest (month)


In [11]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 44379 entries, 0 to 44378
Data columns (total 30 columns):
data_id             44379 non-null int64
iso                 44379 non-null int64
event_id_cnty       44379 non-null object
event_id_no_cnty    44379 non-null float64
event_date          44379 non-null object
year                44379 non-null int64
time_precision      44379 non-null int64
event_type          44379 non-null object
actor1              44379 non-null object
assoc_actor_1       28259 non-null object
inter1              44379 non-null int64
actor2              12599 non-null object
assoc_actor_2       3468 non-null object
inter2              44379 non-null int64
interaction         44379 non-null int64
region              44379 non-null object
country             44379 non-null object
admin1              44379 non-null object
admin2              44368 non-null object
admin3              42785 non-null object
location            44379 non-null object
latitude        

In [12]:
data.isnull().sum()

data_id                 0
iso                     0
event_id_cnty           0
event_id_no_cnty        0
event_date              0
year                    0
time_precision          0
event_type              0
actor1                  0
assoc_actor_1       16120
inter1                  0
actor2              31780
assoc_actor_2       40911
inter2                  0
interaction             0
region                  0
country                 0
admin1                  0
admin2                 11
admin3               1594
location                0
latitude                0
longitude               0
geo_precision           0
source                  0
source_scale           77
notes                   0
fatalities              0
timestamp               0
iso3                    0
dtype: int64

In [13]:
unique_count=pd.Series()
for column in data.columns:
    unique_count[column]=data[column].unique().size
unique_count

data_id             44379
iso                     1
event_id_cnty       44379
event_id_no_cnty    44379
event_date           1122
year                    4
time_precision          3
event_type              8
actor1                487
assoc_actor_1        1213
inter1                  8
actor2                253
assoc_actor_2         376
inter2                  9
interaction            41
region                  1
country                 2
admin1                 35
admin2                717
admin3               2792
location             6314
latitude             6448
longitude            6392
geo_precision           4
source                753
source_scale           19
notes               42923
fatalities             22
timestamp             144
iso3                    1
dtype: int64

In [14]:
# Data cleaning: 1. drop non-important features
#                2. fix null values
#                3. process source, source_scale, actors features

In [15]:
for column in unique_count.index:
    if unique_count[column]==1:
        data.drop(columns=column,inplace=True)

In [16]:
def first_half(string,breakpoint=3):
    return string[0:breakpoint]
def second_half(string,breakpoint=3):
    return string[breakpoint:len(string)]
data['event_id_cnty'].apply(first_half).unique()

array(['IND'], dtype=object)

In [17]:
np.sum(data['event_id_cnty'].apply(second_half).astype(int)-data['event_id_no_cnty'])

0.0

In [18]:
features_to_drop=['country','data_id','event_id_cnty','event_id_no_cnty','timestamp','year','interaction']
Data1=data.drop(columns=features_to_drop)

In [19]:
Data1['event_date_formatted']=pd.to_datetime(Data1['event_date'])
Data1.drop(columns='event_date',inplace=True)

In [20]:
Data1.columns

Index(['time_precision', 'event_type', 'actor1', 'assoc_actor_1', 'inter1',
       'actor2', 'assoc_actor_2', 'inter2', 'admin1', 'admin2', 'admin3',
       'location', 'latitude', 'longitude', 'geo_precision', 'source',
       'source_scale', 'notes', 'fatalities', 'event_date_formatted'],
      dtype='object')

In [21]:
Data1.source_scale.value_counts()

National                     23839
Subnational                  18882
Regional                       892
Subnational-National           345
National-Regional               88
International                   81
Other                           80
Subnational-Regional            79
Subnational-Other                4
Subnational-International        3
National-International           2
regional                         1
Other-National                   1
Regional-Other                   1
National-Other                   1
Subnational- National            1
National; National               1
National-Subnational             1
Name: source_scale, dtype: int64

In [22]:
#Need splitting Columns: source_scale 
SplitNeeded_for_list= ['Subnational-National','National-Regional',
'Subnational-Regional','Subnational-Other','Subnational-International',
'National-International','National-Other','National-Subnational',
'Subnational- National','Regional-Other','Other-National']

In [23]:
Data1.replace('regional','Regional',inplace=True) 

In [24]:
def Source_Split(datacut):
    SourceScaleSplit=pd.DataFrame(datacut.source_scale.str.split(pat="-"))
    SourceSplit=pd.DataFrame(datacut.source.str.split(pat=";"))
    FinalSplit=pd.concat([SourceScaleSplit,SourceSplit],axis=1)
    datacut=datacut.drop(columns=['source','source_scale'])
    datacut=datacut.join(FinalSplit)
    return datacut

In [25]:
for SplitNeeded in SplitNeeded_for_list:
    datacut=Data1[Data1.source_scale==SplitNeeded]    
    Data1.drop(index=datacut.index,inplace=True)
    Data1=Data1.append(Source_Split(datacut))


Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.





In [26]:
Data1.replace(' National','National',inplace=True) 

In [27]:
datacut=Data1[Data1.source_scale=='National; National']
SourceScaleSplit=pd.DataFrame(datacut.source_scale.str.split(pat="; "))
SourceSplit=pd.DataFrame(datacut.source.str.split(pat=";"))
FinalSplit=pd.concat([SourceScaleSplit,SourceSplit],axis=1)
datacut=datacut.drop(columns=['source','source_scale'])
datacut=datacut.join(FinalSplit)
Data1.drop(index=datacut.index,inplace=True)
Data1=Data1.append(datacut)

In [28]:
def No_of_source(source):
    if type(source)==list:
        return len(source)
    else:
        return 1
Data1['No_of_sources']=Data1['source'].apply(No_of_source)

In [29]:
Null_list=Data1.isnull().sum()
Null_list[Null_list>0]

actor2           31780
admin2              11
admin3            1594
assoc_actor_1    16120
assoc_actor_2    40911
source_scale        77
dtype: int64

In [30]:
Source_missing_scale=Data1['source'][Data1.source_scale.isnull()].unique()
Source_missing_scale_filllist=pd.Series()
for source_name in Source_missing_scale:
    Source_missing_scale_filllist[source_name]=Data1[Data1['source']==source_name]['source_scale'].dropna().unique()
Source_missing_scale_filllist

Telegraph (India)                                           [National]
Sangai Express (India)                         [Subnational, National]
Times of India                                 [National, Subnational]
Asian News International           [Regional, International, National]
Chandigarh Tribune                             [Subnational, National]
Indian Express                       [National, Subnational, Regional]
Pioneer (India)                                [National, Subnational]
Hindustan Times (India)                        [National, Subnational]
Pioneer (India); Times of India                                     []
dtype: object

In [31]:
Index_of_NaN=Data1.loc[Data1['source']=='Telegraph (India)'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='National'
Index=Data1.loc[Data1['source']=='Sangai Express (India)'][Data1.source_scale=='National'].index
Data1.loc[Index,'source_scale']='Subnational'
Index_of_NaN=Data1.loc[Data1['source']=='Sangai Express (India)'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='Subnational'
Index=Data1.loc[Data1['source']=='Times of India'][Data1.source_scale=='Subnational'].index
Data1.loc[Index,'source_scale']='National'
Index_of_NaN=Data1.loc[Data1['source']=='Times of India'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='National'
Index=Data1.loc[Data1['source']=='Pioneer (India); Times of India'].index
Data1.loc[Index,'source_scale']='National-National'
datacut=Data1[Data1.source_scale=='National-National']    
Data1.drop(index=datacut.index,inplace=True)
Data1=Data1.append(Source_Split(datacut))
Index=Data1.loc[Data1['source']=='Pioneer (India)'][Data1.source_scale=='Subnational'].index
Data1.loc[Index,'source_scale']='National'
Index_of_NaN=Data1.loc[Data1['source']=='Pioneer (India)'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='National'
Index=Data1.loc[Data1['source']=='Hindustan Times (India)'][Data1.source_scale=='Subnational'].index
Data1.loc[Index,'source_scale']='National'
Index_of_NaN=Data1.loc[Data1['source']=='Hindustan Times (India)'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='National'
Index=Data1.loc[Data1['source']=='Indian Express'][Data1.source_scale=='Regional'].index
Data1.loc[Index,'source_scale']='National'
Index=Data1.loc[Data1['source']=='Indian Express'][Data1.source_scale=='Subnational'].index
Data1.loc[Index,'source_scale']='National'
Index_of_NaN=Data1.loc[Data1['source']=='Indian Express'][Data1.source_scale.isnull()].index
Data1.loc[Index_of_NaN,'source_scale']='National'


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.


Boolean Series key will be reindexed to match DataFrame index.



In [32]:
Source_scale_main=Data1.loc[Data1['source']=='Chandigarh Tribune'].source_scale.value_counts().idxmax()
Index=Data1.loc[Data1['source']=='Chandigarh Tribune'].index
Data1.loc[Index,'source_scale']=Source_scale_main
Source_scale_main=Data1.loc[Data1['source']=='Asian News International'].source_scale.value_counts().idxmax()
Index=Data1.loc[Data1['source']=='Asian News International'].index
Data1.loc[Index,'source_scale']=Source_scale_main

In [33]:
Null_list=Data1.isnull().sum()
Null_list[Null_list>0]

actor2           31780
admin2              11
admin3            1594
assoc_actor_1    16120
assoc_actor_2    40911
dtype: int64

In [34]:
Actors=pd.Series(index=Data1.index)
Actor2_null_imask=Data1.actor2.notnull()
AssoActor1_null_imask=Data1.assoc_actor_1.notnull()
AssoActor2_null_imask=Data1.assoc_actor_2.notnull()
for idx in Data1.index:
    if Actor2_null_imask[idx] and AssoActor1_null_imask[idx] and AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'actor2'],Data1.loc[idx,'assoc_actor_1'],Data1.loc[idx,'assoc_actor_2']]
    elif Actor2_null_imask[idx] and AssoActor1_null_imask[idx] and not AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'actor2'],Data1.loc[idx,'assoc_actor_1']]
    elif Actor2_null_imask[idx] and not AssoActor1_null_imask[idx] and AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'actor2'],Data1.loc[idx,'assoc_actor_2']]
    elif not Actor2_null_imask[idx] and AssoActor1_null_imask[idx] and AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'assoc_actor_1'],Data1.loc[idx,'assoc_actor_2']]
    elif Actor2_null_imask[idx] and not AssoActor1_null_imask[idx] and not AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'actor2']]
    elif not Actor2_null_imask[idx] and not AssoActor1_null_imask[idx] and AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'assoc_actor_2']]
    elif not Actor2_null_imask[idx] and AssoActor1_null_imask[idx] and not AssoActor2_null_imask[idx]:
        Actors[idx]=[Data1.loc[idx,'actor1'],Data1.loc[idx,'assoc_actor_1']]
    else:
        Actors[idx]=Data1.loc[idx,'actor1']
Data1=Data1.join(pd.DataFrame(Actors,columns=['Actors']))
Data1.drop(columns=['actor1','actor2','assoc_actor_1', 'assoc_actor_2'],inplace=True)

In [35]:
def No_of_Actors(actors):
    if type(actors)==list:
        return len(actors)
    else:
        return 1
Data1['No_of_actors']=Data1['Actors'].apply(No_of_Actors)

In [36]:
Administration=pd.Series(index=Data1.index)
Admin2_null_imask=Data1.admin2.notnull()
Admin3_null_imask=Data1.admin3.notnull()
for idx in Data1.index:
    if Admin2_null_imask[idx] and Admin3_null_imask[idx]:
        Administration[idx]=[Data1.loc[idx,'admin1'],Data1.loc[idx,'admin2'],Data1.loc[idx,'admin3']]
    elif Admin2_null_imask[idx] and not Admin3_null_imask[idx]:
        Administration[idx]=[Data1.loc[idx,'admin1'],Data1.loc[idx,'admin2']]    
    elif not Admin2_null_imask[idx] and Admin3_null_imask[idx]:
        Administration[idx]=[Data1.loc[idx,'admin1'],Data1.loc[idx,'admin3']]    
    else:
        Administration[idx]=Data1.loc[idx,'admin1']
Data1=Data1.join(pd.DataFrame(Administration,columns=['Administration_levels']))
Data1.drop(columns=['admin2','admin3'],inplace=True)

In [37]:
def No_of_AdminLevel(admin):
    if type(admin)==list:
        return len(admin)
    else:
        return 1
Data1['No_of_admin_levels']=Data1['Administration_levels'].apply(No_of_AdminLevel)

In [38]:
def month_of(date_formatted):
    return date_formatted.month+12*(date_formatted.year-2016)
Data1['month']=Data1.event_date_formatted.apply(month_of)

In [39]:
Null_list=Data1.isnull().sum()
Null_list[Null_list>0]

Series([], dtype: int64)

In [40]:
Data1.columns

Index(['No_of_sources', 'admin1', 'event_date_formatted', 'event_type',
       'fatalities', 'geo_precision', 'inter1', 'inter2', 'latitude',
       'location', 'longitude', 'notes', 'source', 'source_scale',
       'time_precision', 'Actors', 'No_of_actors', 'Administration_levels',
       'No_of_admin_levels', 'month'],
      dtype='object')

In [41]:
Geopres_distribution=pd.DataFrame(Data1.geo_precision.value_counts())
Geopres_distribution['Normalize']=Data1.geo_precision.value_counts(normalize=True)
Geopres_distribution

Unnamed: 0,geo_precision,Normalize
1,37158,0.837288
2,6495,0.146353
3,725,0.016337
0,1,2.3e-05


In [42]:
Data1.loc[Data1[Data1.geo_precision==Geopres_distribution.index[3]].index,'geo_precision']=3

In [43]:
Timepres_distribution=pd.DataFrame(Data1.time_precision.value_counts())
Timepres_distribution['Normalize']=Data1.time_precision.value_counts(normalize=True)
Timepres_distribution

Unnamed: 0,time_precision,Normalize
1,42464,0.956849
2,1777,0.040041
3,138,0.00311


In [44]:
Eventtype_distribution=pd.DataFrame(Data1.event_type.value_counts())
Eventtype_distribution['Normalize']=Data1.event_type.value_counts(normalize=True)
Eventtype_distribution

Unnamed: 0,event_type,Normalize
Riots/Protests,39144,0.882039
Violence against civilians,2242,0.050519
Battle-No change of territory,2205,0.049686
Strategic development,411,0.009261
Remote violence,373,0.008405
Battle-Government regains territory,2,4.5e-05
Non-violent transfer of territory,1,2.3e-05
Headquarters or base established,1,2.3e-05


In [45]:
SourceScale_Expand=Data1[['event_type','source_scale','latitude','longitude','geo_precision','event_date_formatted','time_precision','fatalities','inter1', 'inter2']]
temp=pd.DataFrame(SourceScale_Expand.source_scale.apply(pd.Series).stack(),columns=['Source_scale_expanded']).reset_index(inplace=False).drop(columns='level_1').set_index('level_0')
del temp.index.name
SourceScale_Expand=SourceScale_Expand.join(temp).drop(columns='source_scale')
SourceScale_Expand.replace(' National','National',inplace=True) 

In [46]:
SourceScale_distribution=pd.DataFrame(SourceScale_Expand.Source_scale_expanded.value_counts())
SourceScale_distribution['Normalize']=SourceScale_Expand.Source_scale_expanded.value_counts(normalize=True)
SourceScale_distribution

Unnamed: 0,Source_scale_expanded,Normalize
National,24217,0.53927
Subnational,19454,0.433206
Regional,1071,0.023849
Other,87,0.001937
International,78,0.001737


In [47]:
Month_distribution=pd.DataFrame(Data1.month.value_counts())
Month_distribution['Normalize']=Data1.month.value_counts(normalize=True)
Month_distribution

Unnamed: 0,month,Normalize
33,1992,0.044886
36,1839,0.041439
28,1645,0.037067
37,1636,0.036864
32,1631,0.036752
34,1624,0.036594
35,1576,0.035512
9,1447,0.032606
13,1357,0.030578
15,1349,0.030397


In [48]:
Protest_riot_data=Data1[Data1.event_type==Eventtype_distribution.index[0]]

In [49]:
Inter1_2_All=pd.crosstab(Data1.inter1,Data1.inter2,normalize=True)
Inter1_2_All_Protest=pd.crosstab(Protest_riot_data.inter1,Protest_riot_data.inter2,normalize=True)
Inter1_2_month=[]
for m in Month_distribution.index:
    Data_m=Data1[Data1.month==m]
    Inter1_2_month.append(pd.crosstab(Data_m.inter1,Data_m.inter2,normalize=True))
Inter1_2_Protest_month=[]
for m in Month_distribution.index:
    Protest_Data_m=Protest_riot_data[Protest_riot_data.month==m]
    Inter1_2_Protest_month.append(pd.crosstab(Protest_Data_m.inter1,Protest_Data_m.inter2,normalize=True))

In [50]:
Fatalities_Inter1_2_All=pd.crosstab(Data1.inter1,Data1.inter2,Data1.fatalities,aggfunc=sum,normalize=False).fillna(0)
Fatalities_Inter1_2_Protest=pd.crosstab(Protest_riot_data.inter1,Protest_riot_data.inter2,Protest_riot_data.fatalities,aggfunc=sum,normalize=False).fillna(0)
Fatalities_Inter1_2_month=[]
for m in Month_distribution.index:
    Data_m=Data1[Data1.month==m]
    Fatalities_Inter1_2_month.append(pd.crosstab(Data_m.inter1,Data_m.inter2,Data1.fatalities,aggfunc=sum,normalize=False).fillna(0))
Fatalities_Inter1_2_Protest_month=[]
for m in Month_distribution.index:
    Protest_Data_m=Protest_riot_data[Protest_riot_data.month==m]
    Fatalities_Inter1_2_Protest_month.append(pd.crosstab(Protest_Data_m.inter1,Protest_Data_m.inter2,Protest_Data_m.fatalities,aggfunc=sum,normalize=False).fillna(0))

In [51]:
State_All=Data1.admin1.value_counts(normalize=True)
Protest_State_All=Protest_riot_data.admin1.value_counts(normalize=True)
State_month=pd.DataFrame(data=None,index=State_All.index)
for m in Month_distribution.index.sort_values():
    Data_m=Data1[Data1.month==m]
    State_month[m]=Data1.admin1[Data1.month==m].value_counts(normalize=True)
State_month.fillna(0,inplace=True)
Protest_State_month=pd.DataFrame(data=None,index=State_All.index)
for m in Month_distribution.index.sort_values():
    Protest_Data_m=Protest_riot_data[Protest_riot_data.month==m]
    Protest_State_month[m]=Protest_riot_data.admin1[Protest_riot_data.month==m].value_counts(normalize=True)
Protest_State_month.fillna(0,inplace=True)

In [52]:
Monthwise_fatalities=pd.DataFrame(Data1.groupby(['month']).fatalities.sum())
Monthwise_fatalities['Normalize']=pd.DataFrame(Data1.groupby(['month']).fatalities.sum()/Data1.fatalities.sum())
del Monthwise_fatalities.index.name
Statewise_fatalities=pd.DataFrame(Data1.groupby(['admin1']).fatalities.sum())
Statewise_fatalities['Normalize']=pd.DataFrame(Data1.groupby(['admin1']).fatalities.sum()/Data1.fatalities.sum())
del Statewise_fatalities.index.name
State_monthwise_fatalities=pd.DataFrame(data=None,index=State_All.index)
for m in Month_distribution.index.sort_values():
    Data_m=Data1[Data1.month==m]
    State_monthwise_fatalities[m]=Data1[Data1.month==m].groupby(['admin1']).fatalities.sum()
    State_monthwise_fatalities.fillna(0,inplace=True)

In [53]:
Protest_Monthwise_fatalities=pd.DataFrame(Protest_riot_data.groupby(['month']).fatalities.sum())
Protest_Monthwise_fatalities['Normalize']=pd.DataFrame(Protest_riot_data.groupby(['month']).fatalities.sum()/Protest_riot_data.fatalities.sum())
del Protest_Monthwise_fatalities.index.name
Protest_Statewise_fatalities=pd.DataFrame(Protest_riot_data.groupby(['admin1']).fatalities.sum())
Protest_Statewise_fatalities['Normalize']=pd.DataFrame(Protest_riot_data.groupby(['admin1']).fatalities.sum()/Protest_riot_data.fatalities.sum())
del Protest_Statewise_fatalities.index.name
Protest_State_monthwise_fatalities=pd.DataFrame(data=None,index=State_All.index)
for m in Month_distribution.index.sort_values():
    Protest_Data_m=Protest_riot_data[Protest_riot_data.month==m]
    Protest_State_monthwise_fatalities[m]=Protest_riot_data[Protest_riot_data.month==m].groupby(['admin1']).fatalities.sum()
    Protest_State_monthwise_fatalities.fillna(0,inplace=True)

In [54]:
State_All=pd.DataFrame(State_All).rename(columns={'admin1':'Normalized_Freq'})
#Protest_State_All
#State_month
#Protest_State_month
#Statewise_fatalities
#State_monthwise_fatalities
#Protest_Statewise_fatalities
#Protest_State_monthwise_fatalities

In [55]:
Inter1_2_All

inter2,0,1,2,3,4,5,6,7,8
inter1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1,0.002186,0.000135,0.000766,0.002141,6.8e-05,0.0,0.001284,0.005881,0.007999
2,0.00169,0.012664,2.3e-05,4.5e-05,2.3e-05,0.0,0.0,0.005025,4.5e-05
3,0.002434,0.017373,0.000225,0.002186,9e-05,0.000113,0.000158,0.026274,0.000383
4,0.000203,0.000924,2.3e-05,0.000293,0.003583,6.8e-05,2.3e-05,0.011244,4.5e-05
5,0.031682,0.072962,0.0,9e-05,0.0,0.023818,0.000901,0.020685,0.000225
6,0.679375,0.056739,0.0,9e-05,0.0,0.000248,0.000901,0.000248,2.3e-05
7,6.8e-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.000428,0.005385,0.0,2.3e-05,0.0,0.0,0.0,0.000496,0.0


In [56]:
inter_code_table={0:inter_codes.description[0],
                             1:inter_codes.description[1],
                             2:inter_codes.description[2],
                             3:inter_codes.description[3],
                             4:inter_codes.description[4],
                             5:inter_codes.description[5],
                             6:inter_codes.description[6],
                             7:inter_codes.description[7],
                             8:inter_codes.description[8]}

In [57]:
Inter1_2_All=Inter1_2_All.rename(columns=inter_code_table,index=inter_code_table)
Inter1_2_All_Protest=Inter1_2_All_Protest.rename(columns=inter_code_table,index=inter_code_table)
Fatalities_Inter1_2_All=Fatalities_Inter1_2_All.rename(columns=inter_code_table,index=inter_code_table)
Fatalities_Inter1_2_Protest=Fatalities_Inter1_2_Protest.rename(columns=inter_code_table,index=inter_code_table)
for m in range(Month_distribution.index.sort_values().max()):
    Inter1_2_month[m]=Inter1_2_month[m].rename(columns=inter_code_table,index=inter_code_table)
    Inter1_2_Protest_month[m]=Inter1_2_Protest_month[m].rename(columns=inter_code_table,index=inter_code_table)
    Fatalities_Inter1_2_month[m]=Fatalities_Inter1_2_month[m].rename(columns=inter_code_table,index=inter_code_table)
    Fatalities_Inter1_2_Protest_month[m]=Fatalities_Inter1_2_Protest_month[m].rename(columns=inter_code_table,index=inter_code_table)

In [58]:
def get_Mapdata(input_data,input_title):
    trace = go.Heatmap(z=input_data.to_dict(orient='split')['data'],
                       x=input_data.to_dict(orient='split')['columns'],
                       y=input_data.to_dict(orient='split')['index'],
                       colorscale=[[0.0, 'rgb(165,0,38)'], [0.01, 'rgb(215,48,39)'], 
                                   [0.2, 'rgb(244,109,67)'], [0.3, 'rgb(253,174,97)'], 
                                   [0.4, 'rgb(254,224,144)'], [0.5, 'rgb(224,243,248)'],
                                   [0.6, 'rgb(171,217,233)'], [0.7, 'rgb(116,173,209)'], 
                                   [0.8, 'rgb(69,117,180)'], [1.0, 'rgb(49,54,149)']],
                       colorbar = dict(
                                    title = input_title)
                      )
    return [trace]

In [59]:
layout_All = go.Layout(
    title='Interaction table over period (26-jan-2016 to 26-jan-2019)',
        autosize=True,
        yaxis=go.layout.YAxis(
                              title='Actor 1',
                              automargin=True,
                             ),
        xaxis=go.layout.XAxis(
                              title='Actor 2',
                              automargin=True,
                             ),    
)

In [60]:
# fig1 = go.Figure(data=get_Mapdata(Inter1_2_All,'Normalized Frequency'), layout=layout_All)
# iplot(fig, filename='heatmap-1')

In [61]:
# fig = go.Figure(data=get_Mapdata(Fatalities_Inter1_2_All,'Fatalities Count'), layout=layout_All)
# iplot(fig, filename='heatmap-2')

In [62]:
def get_Mapdata_slider(input_data,input_title):
    trace=[]
    for m in range(Month_distribution.index.sort_values().max()):
             trace.append( go.Heatmap(z=input_data[m].to_dict(orient='split')['data'],
                       x=input_data[m].to_dict(orient='split')['columns'],
                       y=input_data[m].to_dict(orient='split')['index'],
                       colorscale=[[0.0, 'rgb(165,0,38)'], [0.01, 'rgb(215,48,39)'], 
                                   [0.2, 'rgb(244,109,67)'], [0.3, 'rgb(253,174,97)'], 
                                   [0.4, 'rgb(254,224,144)'], [0.5, 'rgb(224,243,248)'],
                                   [0.6, 'rgb(171,217,233)'], [0.7, 'rgb(116,173,209)'], 
                                   [0.8, 'rgb(69,117,180)'], [1.0, 'rgb(49,54,149)']],
                       colorbar = dict(
                                    title = input_title)
                      ))
    return [trace]

In [63]:
Slider_data1=get_Mapdata_slider(Inter1_2_month,'Normalized Frequency')

In [64]:
steps = []
for i in range(Month_distribution.index.sort_values().max()):
    step = dict(
                method = 'restyle',  
                args = ['visible', [False] * Month_distribution.index.sort_values().max()],
                label= 'Month ' + str(i+1)
                )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)
    sliders = [dict(
                    active = 0,
                    currentvalue = {"prefix": ""},
                    pad = {"t": Month_distribution.index.sort_values().max()},
                    steps = steps,
                    name= 'Month',
                    y= 1.5
              )]
layout = dict(sliders=sliders,
              title='Monthwise Interaction table',
              autosize=True,
              yaxis=go.layout.YAxis(
                                    title='Actor 1',
                                    automargin=True,
                                   ),
              xaxis=go.layout.XAxis(
                                    title='Actor 2',
                                    automargin=True,
                                   ),
             )
fig = dict(data=Slider_data1[0], layout=layout)
iplot(fig, filename='Slider 1')

In [86]:
# steps = []
for i in range(Month_distribution.index.sort_values().max()):
    step = dict(
        method = 'restyle',  
        args = ['visible', [False] * Month_distribution.index.sort_values().max()],
    )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)
    sliders = [dict(
    active = 10,
    currentvalue = {"prefix": "Month"},
    pad = {"t": Month_distribution.index.sort_values().max()},
    steps = steps
)]
layout = dict(sliders=sliders,
              title='Monthwise Interaction table',
              autosize=True,
              yaxis=go.layout.YAxis(
                                    title='Actor 1',
                                    automargin=True,
                                   ),
              xaxis=go.layout.XAxis(
                                    title='Actor 2',
                                    automargin=True,
                                   ),
             )
fig = dict(data=Slider_data2[0], layout=layout)
iplot(fig, filename='Slider 2')

In [67]:
def get_Mapdata_slider2(input_data):
    trace=[]
    for m in Month_distribution.index.sort_values():
             trace.append(go.Bar(
                            x=input_data.index,
                            y=input_data[m]
                                )
                          )
    return [trace]
Slider_data2=get_Mapdata_slider2(State_month)

In [68]:
steps = []
for i in range(Month_distribution.index.sort_values().max()):
    step = dict(
                method = 'restyle',  
                args = ['visible', [False] * Month_distribution.index.sort_values().max()],
                label= 'Month ' + str(i+1)
                )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)
    sliders = [dict(
                    active = 0,
                    currentvalue = {"prefix": ""},
                    pad = {"t": Month_distribution.index.sort_values().max()},
                    steps = steps,
                    name= 'Month',
                    y= 1.5
                    )]
layout = dict(sliders=sliders,
              title='Monthwise Event Frequency',
              autosize=True,
              yaxis=go.layout.YAxis(
                                    title='Normalized Frequency',
                                    automargin=True,
                                   ),
              xaxis=go.layout.XAxis(
                                    title='States',
                                    automargin=True,
                                   ),
             )
fig = dict(data=Slider_data2[0], layout=layout)
iplot(fig, filename='Slider 2')

In [88]:
Slider_data2=get_Mapdata_slider2(State_monthwise_fatalities)
steps = []
for i in range(Month_distribution.index.sort_values().max()):
    step = dict(
                method = 'restyle',  
                args = ['visible', [False] * Month_distribution.index.sort_values().max()],
                label= 'Month ' + str(i+1)
                )
    step['args'][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)
    sliders = [dict(
                    active = 0,
                    currentvalue = {"prefix": ""},
                    pad = {"t": Month_distribution.index.sort_values().max()},
                    steps = steps,
                    name= 'Month',
                    y= 1.5
                    )]
layout = dict(sliders=sliders,
              title='Monthwise Fatalities',
              autosize=True,
              yaxis=go.layout.YAxis(
                                    title='Count',
                                    automargin=True,
                                   ),
              xaxis=go.layout.XAxis(
                                    title='States',
                                    automargin=True,
                                   ),
             )
fig = dict(data=Slider_data2[0], layout=layout)
iplot(fig, filename='Slider 2')

In [84]:
Month_State_events=Data1.groupby(['month','admin1']).count().fatalities
Month_State_events

month  admin1           
1      Andhra Pradesh         7
       Arunachal Pradesh      1
       Assam                 24
       Bihar                 19
       Chandigarh            11
       Chhattisgarh           8
       Goa                    2
       Gujarat               16
       Haryana               66
       Himachal Pradesh       6
       Jammu and Kashmir     52
       Jharkhand             18
       Karnataka             30
       Kerala                21
       Madhya Pradesh         4
       Maharashtra           24
       Manipur               18
       Meghalaya              8
       NCT of Delhi          38
       Odisha                15
       Puducherry             1
       Punjab               144
       Rajasthan              9
       Tamil Nadu            55
       Telangana             18
       Uttar Pradesh         33
       Uttarakhand           84
       West Bengal           20
2      Andhra Pradesh         8
       Arunachal Pradesh      2
               