In [2]:
#from datascience import *
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import folium
import json
import os
from branca.colormap import linear
import branca.colormap

In [3]:
# India States GeoJSON file
in_states = '../Data/ne_10m_admin_1_states_provinces.json'
geo_json_data = json.load(open(in_states))

In [4]:
geo_json_data.keys()

dict_keys(['type', 'features'])

In [5]:
gjd = geo_json_data['features'][1]
gjd['properties']['admin']

'Uruguay'

In [6]:
len(geo_json_data['features'])

4594

In [7]:
# Narrowing down the dictionary to just get a shapefile for India.
geo_india_features = [f for f in geo_json_data['features'] if f['properties']['admin'] == 'India']

In [8]:
len(geo_india_features)

35

In [9]:
geo_india = {'type': geo_json_data['type'], 'features': geo_india_features}

In [11]:
# Importing the chowkidar data
chowkidar_proportions = pd.read_csv('../Results/chowkidar_proportions.csv').drop(['Unnamed: 0'], axis=1)
chowkidar_proportions.head(1)

Unnamed: 0,source,chowkidar yes/no mean
0,mphemantgodse,0.0


In [12]:
# Importing data to match the chowkidar scores to MPs and constituencies.
background_info = pd.read_csv('../Data/background_info.csv')
mps = pd.read_csv('../Data/MP_Twitter_Handles.csv')

In [13]:
# Here, I merge background_info with mps to match the MPs to their constituency.
# Because constituencies are coded differently in the two tables, I merge on the total number of votes
# each MP received, because I assume that that number will be individual to each MP.
merged = pd.merge(left=mps, right=background_info,
                  left_on='totvotpoll', right_on='Votes')
clean_merged = merged[['st_name', 'year', 'indv_id', 'pc_no', 'pc_name', 'pc_type', 'cand_name',
                 'cand_sex', 'partyname', 'partyabbre', 'totvotpoll', 'electors', 'max_votes', 'handle',
                 'Valid_Votes', 'Constituency_Name', 'Sub_Region', 'N_Cand', 'Turnout_Percentage',
                 'Vote_Share_Percentage', 'Margin', 'Margin_Percentage', 'ENOP', 'pid', 'Party_type_TCPD',
                 'Party_ID', 'last_poll', 'Contested', 'No_Terms', 'Turncoat', 'Incumbent',
                 'Recontest']
clean_merged['bjp_or_not'] = [1 if p == 'BJP' else 0 for p in clean_merged['partyabbre']]

# Need to further strip the handles and remove the '-'
clean_merged = clean_merged.loc[clean_merged['handle'] != '-']
clean_merged = clean_merged.loc[clean_merged['handle'] != ',-']
clean_merged = clean_merged.loc[clean_merged['cand_name'] != 'Gawali Bhavana Pundlikrao']
clean_merged['handle'] = clean_merged['handle'].str.strip('@')
clean_merged['handle'] = clean_merged['handle'].str.rstrip()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


In [14]:
# Then I merge again to combine the MPs + constituencies to their chowkidar score.
total_info = pd.merge(left=clean_merged, right=chowkidar_proportions,
                  left_on='handle', right_on='source')
total_info.head(1)

Unnamed: 0,st_name,year,indv_id,pc_no,pc_name,pc_type,cand_name,cand_sex,partyname,partyabbre,...,Party_ID,last_poll,Contested,No_Terms,Turncoat,Incumbent,Recontest,bjp_or_not,source,chowkidar yes/no mean
0,Andaman & Nicobar Islands,2014,2601.0,1,Andaman & Nicobar Islands,GEN,Bishnu Pada Ray,M,BJP,BJP,...,1605,True,7,3,False,True,True,1,andamanmp,0.33


In [15]:
# Finally, to be able to map by state, I need an aggregate chowkidar score for each state.
# So I decide to take the mean score of all MPs from that state.
chowkidar_states = total_info[['st_name', 'chowkidar yes/no mean']]
chowkidar_by_state = chowkidar_states.groupby(['st_name']).agg(['mean']).reset_index()
chowkidar_by_state.columns = ['st_name', 'chowkidar yes/no mean']
chowkidar_by_state.head(1)

Unnamed: 0,st_name,chowkidar yes/no mean
0,Andaman & Nicobar Islands,0.33


In [17]:
states = list(chowkidar_by_state['st_name'])
prop = list(chowkidar_by_state['chowkidar yes/no mean'])

In [18]:
# Reformating as a dictionary.
chowkidar_dict1 = dict(zip(states,prop))
chowkidar_dict1

{'Andaman & Nicobar Islands': 0.33,
 'Andhra Pradesh': 0.17951103186697573,
 'Arunachal Pradesh': 0.19221311888421966,
 'Assam': 0.25491932381817384,
 'Bihar': 0.305505211072137,
 'Chandigarh': 0.3333333333333333,
 'Chhattisgarh': 0.25623750129400624,
 'Goa': 0.3925619834710744,
 'Gujarat': 0.3708686418758359,
 'Haryana': 0.3833907468072754,
 'Himachal Pradesh': 0.31395743901694706,
 'Jammu & Kashmir': 0.3566598788638262,
 'Jharkhand': 0.39611573384833476,
 'Karnataka': 0.24675791297437918,
 'Kerala': 0.0732958947031507,
 'Lakshadweep': 0.1111111111111111,
 'Madhya Pradesh': 0.3336286556241059,
 'Maharashtra': 0.24894988527229803,
 'Meghalaya': 0.15625,
 'Nct Of Delhi': 0.32435372795810663,
 'Odisha': 0.18612997934554848,
 'Punjab': 0.09374515012598028,
 'Rajasthan': 0.33866267949874707,
 'Sikkim': 0.13758389261744966,
 'Tamil Nadu': 0.13131786912274718,
 'Uttar Pradesh': 0.34350562432676296,
 'Uttarakhand': 0.4246892241078287,
 'West Bengal': 0.06406280133815703}

In [19]:
chowkidar_dict = chowkidar_by_state.set_index('st_name')['chowkidar yes/no mean']

In [20]:
geo_india

{'type': 'FeatureCollection',
 'features': [{'type': 'Feature',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[68.18300782500012, 23.842087206000073],
      [68.18303772804984, 23.842158406551917],
      [68.18351729300008, 23.843300273000054],
      [68.20728845300005, 23.877019145000034],
      [68.21503991700007, 23.881747539000017],
      [68.23291996300009, 23.889111430000114],
      [68.23994795800013, 23.893116352999996],
      [68.24304854300004, 23.893426412000096],
      [68.25286706500003, 23.891695252000105],
      [68.2567944750001, 23.891927796000076],
      [68.26036014800007, 23.89554514600006],
      [68.25793135600009, 23.89947255500003],
      [68.25374556500009, 23.902779847000133],
      [68.25193689000008, 23.904821066000082],
      [68.2530737720001, 23.911228943000125],
      [68.25266036000005, 23.920091452000023],
      [68.25493412300011, 23.929108989000085],
      [68.26392582200003, 23.935930277000026],
      [68.27400272600005, 23.93768727700011]

In [26]:
# This code uses folium to generate a choropleth map, color coding each state according to what it's mean
# chowkidar score is.
in_map = folium.Map([20, 79], zoom_start=4)
folium.Choropleth(
    geo_data=geo_india,
    name='choropleth',
    data=chowkidar_by_state,
    columns=['st_name', 'chowkidar yes/no mean'],
    key_on='feature.properties.name',
    fill_color='BuPu',
    fill_opacity=0.5,
    line_opacity=0.2,
    legend_name='Chowkidar Proportion'
).add_to(in_map)

in_map