# Indian Election Analysis

### Import Packages

In [1]:
# import packages
import numpy as np
import pandas as pd
import plotly as py
import plotly.graph_objs as go
import ipywidgets as widgets
from bubbly.bubbly import bubbleplot 
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [2]:
# read data
nl = pd.read_table('nl.tab')  #national legislatures i.e Lok sabha

In [3]:
nl.head()

Unnamed: 0,st_name,year,pc_no,pc_name,pc_type,cand_name,cand_sex,partyname,partyabbre,totvotpoll,electors
0,Andaman & Nicobar Islands,1977,1,Andaman & Nicobar Islands,GEN,K.R. Ganesh,M,Independents,IND,25168,85308
1,Andaman & Nicobar Islands,1977,1,Andaman & Nicobar Islands,GEN,Manoranjan Bhakta,M,Indian National Congress,INC,35400,85308
2,Andaman & Nicobar Islands,1980,1,Andaman & Nicobar Islands,GEN,Ramesh Mazumdar,M,Independents,IND,109,96084
3,Andaman & Nicobar Islands,1980,1,Andaman & Nicobar Islands,GEN,Alagiri Swamy,M,Independents,IND,125,96084
4,Andaman & Nicobar Islands,1980,1,Andaman & Nicobar Islands,GEN,Kannu Chemy,M,Independents,IND,405,96084


In [4]:
nl.tail()

Unnamed: 0,st_name,year,pc_no,pc_name,pc_type,cand_name,cand_sex,partyname,partyabbre,totvotpoll,electors
73076,West Bengal,2014,42,Birbhum,GEN,None Of The Above,,NOTA,NOTA,14557,1495089
73077,West Bengal,2014,42,Birbhum,GEN,Syed Siraj Jimmi,M,INC,INC,132084,1495089
73078,West Bengal,2014,42,Birbhum,GEN,Joy Banerjee,M,BJP,BJP,235753,1495089
73079,West Bengal,2014,42,Birbhum,GEN,Dr. Elahi Kamre Mahammad,M,CPM,CPM,393305,1495089
73080,West Bengal,2014,42,Birbhum,GEN,Satabdi Roy,F,AITC,AITC,460568,1495089


In [5]:
nl.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 73081 entries, 0 to 73080
Data columns (total 11 columns):
st_name       73081 non-null object
year          73081 non-null int64
pc_no         73081 non-null int64
pc_name       73081 non-null object
pc_type       65011 non-null object
cand_name     73081 non-null object
cand_sex      72539 non-null object
partyname     73081 non-null object
partyabbre    73081 non-null object
totvotpoll    73081 non-null int64
electors      73081 non-null int64
dtypes: int64(4), object(7)
memory usage: 6.1+ MB


### Overview 

This dataset contains data from 1977 - 2014.

* st_name - State Name
* year - Year of election
* pc_no - Parlimentary constituency number
* pc_name - Parlimentary constituency Name
* pc_type - Parliamentary constituency reservation status
* cand_name - Candidate Name
* cand_sex - Candidate Gender
* partyname - Party Name
* partyabbre - Party abbreviation 
* totvotpoll - Votes Received
* electors - Number of registered voters

### Assess

In [6]:
# check for dublicate entries
nl.duplicated().sum()

1

In [7]:
# check for null values
nl.isna().any()

st_name       False
year          False
pc_no         False
pc_name       False
pc_type        True
cand_name     False
cand_sex       True
partyname     False
partyabbre    False
totvotpoll    False
electors      False
dtype: bool

In [8]:
nl[pd.isnull(nl).any(axis = 1)]

Unnamed: 0,st_name,year,pc_no,pc_name,pc_type,cand_name,cand_sex,partyname,partyabbre,totvotpoll,electors
64,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. T. Ali,M,Independent,IND,398,265110
65,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Dr. Thankachan,M,Independent,IND,683,265110
66,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. N. K. P. Nair,M,Communist Party Of India (Marxist-Leninist) (L...,CPI(ML),734,265110
67,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. M. S. Mohan,M,Bahujan Samaj Party,BSP,789,265110
68,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. Vakiath Valappil Khalid,M,Independent,IND,1480,265110
69,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. Pradeep Kumar Ekka,M,Jharkhand Disom Party,JDP,1785,265110
70,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Smti. R. S. Uma Bharatthy,F,Nationalist Congress Party,NCP,4696,265110
71,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. P. R. Ganeshan,M,Rashtriya Janata Dal,RJD,4916,265110
72,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri Tapan Kumar Bepari,M,Communist Party Of India (Marxist),CPM,7190,265110
73,Andaman & Nicobar Islands,2009,1,Andaman & Nicobar Islands,,Shri. Kuldeep Rai Sharma,M,Indian National Congress,INC,72221,265110


In [9]:
nl.st_name.value_counts()

Uttar Pradesh                          14791
Bihar                                   7727
Maharashtra                             6458
Tamil Nadu                              5309
Andhra Pradesh                          5236
Madhya Pradesh                          5196
West Bengal                             3648
Karnataka                               3624
Rajasthan                               3433
Gujarat                                 2946
Haryana                                 2066
Kerala                                  1881
National Capital Territory Of Delhi     1823
Punjab                                  1715
Orissa                                  1354
Assam                                   1123
Jammu & Kashmir                          695
Jharkhand                                685
Himachal Pradesh                         347
Chhattisgarh                             324
Nct Of Delhi                             317
Chandigarh                               295
Odisha    

In [10]:
nl.partyname.value_counts()

Independent                                31458
IND                                         5619
Independents                                4050
Indian National Congress                    3919
Bharatiya Janata Party                      2329
Bahujan Samaj Party                         1670
Janata Dal                                   943
BSP                                          938
INC                                          881
BJP                                          792
Janata Party                                 712
Samajwadi Party                              621
Communist Party Of India (Marxist)           608
NOTA                                         543
Communist Party Of India                     507
Indian Natioanl Congress (I)                 492
SP                                           434
AAAP                                         432
Janta Party                                  432
Bharatiya Lok Dal                            405
Doordarshi Party    

In [11]:
nl.partyabbre.value_counts()

IND           41127
INC            4800
BJP            3350
BSP            2624
SP             1057
JD              943
CPM             770
DDP             716
JNP             675
CPI             608
JP              551
NOTA            543
INC(I)          492
SHS             462
AAAP            432
BLD             405
AIIC(T)         321
TDP             301
JNP(S)          294
RJD             291
CPI(ML)(L)      286
AD              286
JD(U)           281
NCP             268
DMK             253
LKD             253
AJBP            253
BMUP            233
AITC            228
RPI             216
              ...  
AIFB(S)           1
PDI               1
ADC               1
LJVM              1
RSOSP             1
NMNP              1
NSCP              1
aimpr             1
RMOP              1
BVLP              1
CKN               1
mmsp              1
UPP               1
RAIM              1
PMP               1
KDP               1
TNGP              1
MPVC              1
chp               1


In [12]:
nl.cand_sex.value_counts()

M    68885
F     3648
O        6
Name: cand_sex, dtype: int64

### Wrangling

In [13]:
# Deleting rows with null values doesn't seem to be a good option so, I'll fill the null values with undefined

nl.fillna('Undefined', inplace=True)

In [14]:
# drop dupilcate entry
nl.drop_duplicates(inplace= True)

In [15]:
# replace state where two entries name Goa Daman & Diu and nct of Delhi
# and lets keep Goa, Daman, and Diu as it is because they were a union territory of India 
#from 19 December 1961 to 30 May 1987. 
#The union territory comprised the present-day state of Goa and the two 
#small coastal enclaves of Daman and Diu on the coast of Gujarat.

nl.st_name.replace('Goa Daman & Diu', 'Goa, Daman & Diu', inplace = True)
nl.st_name.replace('Nct Of Delhi', 'National Capital Territory Of Delhi', inplace = True)
nl.st_name.replace('Delhi', 'National Capital Territory Of Delhi', inplace = True)

##### There are few rows in partynames where abbrivation is used instead of party names I'll not be replacing those beacuse I'll use the partyabbre column instead of partynames.

In [16]:
# replace gender O with undefined
nl.cand_sex.replace('O', 'Undefined', inplace = True)

In [17]:
nl['partyabbre'] = nl['partyabbre'].str.upper()

In [18]:
nl.to_csv('nl_elections.csv', encoding='UTF-8')

### Analysis

#### Total Votes over time

In [19]:
# we cannot use electors to compare because it contains total number of registered voters 
#which also includes people who are dead.

total = nl.groupby(['year']).sum()['totvotpoll'].reset_index(name ='Total')

trace1 = go.Scatter( x = total.year,
                   y = total.Total,
                   mode = 'lines+markers',
                   name = 'Total Votes')
data = [trace1]

layout = go.Layout(title = 'Total votes over year',
                  xaxis = dict(title = 'Year'),
                  yaxis = dict(title = 'Count'))

fig = go.Figure(data = data, layout = layout)
py.offline.iplot(fig)

*   All political parties were making extensive use of social media during the election campaign - relatively new in Indian politics.
*   While social media still has a long way to reach India's remote corners,
    it certainly appeals to the youth who are a significant vote bank in 2014.
*   In current 2019 where Internet is widely available in almost everywhere
    corner we can see a huge rise in the vote counts this year.

In [20]:
#candidate participation

participation = nl.groupby('year').count()['cand_name'].reset_index(name='Total')

trace1 = go.Scatter(x = participation.year,
                   y = participation.Total,
                   mode = 'lines+markers')
layout = go.Layout(title = 'Candidate Participation',
                 xaxis = {'title' : 'Year'},
                 yaxis = {'title' : 'Count'})
data = [trace1]
fig = go.Figure(data, layout)
iplot(fig)

* The 9th General Elections had 6K candidates in the fray, while in the 10th General Elections around 8K candidates contested for 543 seats.
* In the Eleventh General Elections, 13K candidates contested for 543 seats,
  which were reduced drastically to 4K candidates in 12th Lok Sabha,
  because of increase of security deposit amount.

In [21]:
# candidate gender distribution

gen_m = nl[nl['cand_sex'] == 'M']
gen_f = nl[nl['cand_sex'] == 'F']
total_m = gen_m.groupby(['year']).count()['cand_sex'].reset_index(name ='Total')
total_f = gen_f.groupby(['year']).count()['cand_sex'].reset_index(name ='Total')

trace1 = go.Scatter(x = total_m.year,
                   y = total_m.Total,
                   mode = 'lines+markers',
                   name = 'Male') 
trace2 = go.Scatter(x = total_f.year,
                   y = total_f.Total,
                   mode = 'lines+markers',
                   name = 'Female')

data = [trace1,trace2]
layout = go.Layout(title = 'Candidate Gender Distribution',
                 xaxis = {'title' : 'Year'},
                 yaxis = {'title' : 'Count'})

fig = go.Figure(data, layout)
iplot(fig)

* There is a slight increase in female candidates over the past few elections.

In [28]:
# parlimentary constiuency rate

pc_rate = nl.groupby('year').count()['pc_name'].reset_index(name ='Total')

trace = go.Scatter( x = pc_rate.year,
                  y = pc_rate.Total, 
                  mode = 'lines+markers')
data = [trace]
layout = go.Layout( title = 'Parlimentary constituency rate',
                  xaxis = dict(title = 'Year'),
                  yaxis = dict(title = 'Count'))
fig = go.Figure(data, layout)
iplot(fig)

* As the number of candidates, electors increases so does the polling booth/Parlimentary constituency.

In [30]:
d = widgets.Dropdown(
    options=[i for i in nl.st_name.unique()],
    description='Select State:',
    disabled=False,
)

def get_plot(value):
    value = d.value
    state = nl[nl['st_name'] == value]
    total = state.groupby(['partyabbre', 'year']).sum()['totvotpoll'].reset_index(name ='Total')
    ttitle = 'People\'s Choice in ' + value  
    figure = bubbleplot(dataset=total,
        x_column='year', y_column='Total', bubble_column='partyabbre',
        size_column='Total', color_column='partyabbre',
        x_title="Years", y_title="Total Number of Votes",
        title = ttitle,
        x_range=['1977', '2014'],
        marker_opacity = 0.6)
    iplot(figure)
    


In [31]:
d.observe(get_plot, names="value")
display(d)


Dropdown(description='Select State:', options=('Andaman & Nicobar Islands', 'Andhra Pradesh', 'Arunachal Prade…

#### You can find the web app for detailed Analysis/Visualization
https://election-analysis-app.herokuapp.com/