In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.offline as py
import plotly.express as px

sns.set()

In [None]:
# Import data
df = pd.read_csv('LS_2.0.csv')
df.columns = df.columns.str.replace('\n',' ')
df.head(10)

#Number of constituency per state

In [None]:
constituencies_per_state = df.groupby('STATE')['CONSTITUENCY'].nunique().reset_index().sort_values('CONSTITUENCY', ascending=False)
fig = px.bar(constituencies_per_state, x='STATE', y='CONSTITUENCY', color='CONSTITUENCY', height=600)
fig.show()

 Above bar graph we see that Uttar Pradesh have higest constituency i.e 80 and many state have only 1 constituency.

#Number of candidate from each party

In [None]:
candidates_per_party = df.PARTY.value_counts().reset_index().rename(columns = {'index':'Party','PARTY':'Candidates'}).head(50)
candidates_per_party = candidates_per_party[candidates_per_party['Party'] != 'NOTA']
candidates_per_party = candidates_per_party.reset_index().sort_values('Candidates', ascending=False)
fig = px.bar(candidates_per_party, x='Party', y='Candidates', color='Candidates', height=500)
fig.show()

BJP have most number of candidates (420) and Congress comes in 2nd with  total of 413 candidate in 2019 election followed by IND with 201 number of candidates.

#Party wise vote share

In [None]:
vote_share_top5 = df.groupby('PARTY')['TOTAL VOTES'].sum().nlargest(5).index.tolist()
def vote_share(row):
    if row['PARTY'] not in vote_share_top5:
        return 'Other'
    else:
        return row['PARTY']
df['Party New'] = df.apply(vote_share,axis =1)
counts = df.groupby('Party New')['TOTAL VOTES'].sum(sort=True)
labels = counts.index
values = counts.values
pie = go.Pie(labels=labels, values=values)
fig = go.Figure(data=[pie])
py.iplot(fig)

Above pie chart shows that bjp have higest vote share which is equals to the 38.5 congress comes in 2nd with 20.1% of votes. 31.1% of votes are given to parties excluding bjp, inc, aitc, bsp and sp.

#Number of winning candidate from top 5 party

In [None]:
winning_candidates_per_party = df.groupby('PARTY')['WINNER'].sum().reset_index().sort_values('WINNER',ascending = True)
winning_candidates_per_party = winning_candidates_per_party[winning_candidates_per_party['WINNER'] > 0]
winning_candidates_per_party
fig = px.bar(winning_candidates_per_party.tail(), x='WINNER', y='PARTY', color='WINNER', height=500, orientation='h')
fig.show()

The bar graph above show that total 300 candidates of bjp won the election and only 52 candidate of congress won the 2019 election. The difference bewteen 1st party with higest winning candidate and 2nd party is 248. Total seats require to form goverment in lok sabha is 280.

#Number of candidate according to different age group

In [None]:
fig = px.histogram(df.dropna(), x="AGE",hover_data=df.columns)
fig.show()

#Youngest winning candidate


In [None]:
df_winners = df[df['WINNER']==1]
df_winners = df_winners.sort_values('AGE').head(10)
fig = px.bar(df_winners, x='NAME', y='AGE', 
color='AGE', height=500, hover_data=['PARTY','SYMBOL','CONSTITUENCY','STATE'])
fig.show()

Goddeti Madhavi (26) from Aruku constituency was youngest MP in 2019 lok sabha election. 



#Oldest winning candidate



In [None]:
df_winners = df[df['WINNER']==1]
df_winners = df_winners.sort_values('AGE',ascending=False).head(10)
fig = px.bar(df_winners, x='NAME', y='AGE', color='AGE', height=500, hover_data=['PARTY','SYMBOL','CONSTITUENCY','STATE'])
fig.show()

Dr. Shafiqur Rehman Barq (86) from Sambhal constituency was most aged MP in 2019 lok sabha election.

#Criminal cases against candidate

In [None]:
df['CRIMINAL CASES'] = df['CRIMINAL CASES'].str.replace('Not Available','0')
df['CRIMINAL CASES'] = df['CRIMINAL CASES'].fillna(0)
df['CRIMINAL CASES'] = df['CRIMINAL CASES'].astype(int)
criminal_cases = df[(df['CRIMINAL CASES'] != 'Not Available') & (df['CRIMINAL CASES'].notnull())]
criminal_cases = criminal_cases.groupby('PARTY')['CRIMINAL CASES'].sum().reset_index().sort_values('CRIMINAL CASES',ascending=False).head(30)
fig = px.bar(criminal_cases, x='PARTY', y='CRIMINAL CASES', color='CRIMINAL CASES', height=500)
fig.show()

BJP have higest number of criminal record with total count of 898. Congress comes in second with total criminal record of 734.

In [None]:
df_criminal_cases = df.loc[(df['CRIMINAL CASES'].notnull()) & (df['CRIMINAL CASES'] != 'Not Available')]
def criminal_cases(row):
    if row['CRIMINAL CASES'] == 0:
        return 'No'
    else:
        return 'Yes'
df_criminal_cases['HAS CRIMINAL CASE'] = df_criminal_cases.apply(criminal_cases,axis = 1)
df_criminal_cases = df_criminal_cases[df_criminal_cases['WINNER']==1]
df_criminal_cases_count = df_criminal_cases.groupby(['PARTY','HAS CRIMINAL CASE']).size().reset_index()
df_criminal_cases_count.columns = ['PARTY','HAS CRIMINAL CASE','COUNT']
df_criminal_cases_count = df_criminal_cases_count.sort_values('COUNT', ascending=False)
#df_criminal_cases_count = df_criminal_cases.sort_values(df_criminal_cases_count.columns[2], ascending=False)
#df_criminal_cases_count.columns = ['PARTY','HAS CRIMINAL CASE','COUNT']
fig = px.bar(df_criminal_cases_count, x="PARTY", y="COUNT", color='HAS CRIMINAL CASE', height=500)
fig.show()

Total 233 candidate with criminal record won the election which is about 43% of total lok sabha seats.114 elected candidate os bjp have criminal record against them whereas in congress 30 elected candidate have criminal record and 22 don't have any criminal record.

#Education qualification of candidate

In [None]:
df['EDUCATION'] = df['EDUCATION'].str.replace('Post Graduate\n','Post Graduate')
df['EDUCATION'] = df['EDUCATION'].fillna('Others') 
education = df[df['EDUCATION'] != 'Not Available']
education = education['EDUCATION'].value_counts().reset_index()
education.columns = ['EDUCATION','COUNT']
fig = px.bar(education, x='EDUCATION', y='COUNT', color='COUNT', height=500)
fig.show()

Most of the candidates in 2019 election was well qualified but still there are some number of candidate whose qualification was not above 10th pass.

#Voting percentage per state

In [None]:
df_votes_perct_constituency = df.groupby(['STATE','CONSTITUENCY','TOTAL ELECTORS'])['TOTAL VOTES'].sum().reset_index()
df_votes_perct_constituency['% VOTED IN CONSTITUENCY'] = round(df_votes_perct_constituency['TOTAL VOTES']*100/df_votes_perct_constituency['TOTAL ELECTORS'],2)
df_voters_state = df[['STATE','CONSTITUENCY','TOTAL ELECTORS']].drop_duplicates()
df_voters_state = df_voters_state.groupby('STATE')['TOTAL ELECTORS'].sum().reset_index()
df_votes_state = df.groupby('STATE')['TOTAL VOTES'].sum().reset_index().sort_values('TOTAL VOTES',ascending = False)
df_votes_perct_state = pd.merge(df_votes_state,df_voters_state, on ='STATE',how = 'left')
df_votes_perct_state['% VOTED IN STATE'] = round(df_votes_perct_state['TOTAL VOTES']*100/df_votes_perct_state['TOTAL ELECTORS'],2)
df_votes_perct_state = df_votes_perct_state.sort_values('% VOTED IN STATE',ascending = False)
fig = px.bar(df_votes_perct_state, x='STATE', y='% VOTED IN STATE', color='% VOTED IN STATE', height=500)
fig.show()

Lakshadweep have higest voting percentage of 83.75% and Jammu & kashmir have lowest voting percentage of 43.32. Overall voting percentage was around 67%.

In [None]:
df_assets = df.copy()
df_assets[['ASSETS2','ASSETS_VALUE']] = df_assets['ASSETS'].str.split('~',expand=True)
df_assets.drop(['ASSETS2'],axis =1,inplace=True)
df_assets = df_assets[df_assets['ASSETS_VALUE'].notnull()]
def asset_range(row):
    if row['ASSETS_VALUE'].endswith('Crore+'):
        return 'Crore+'
    elif row['ASSETS_VALUE'].endswith('Lacs+'):
        return 'Lakh+'
    elif row['ASSETS_VALUE'].endswith('Thou+'):
        return 'Thousand+'
    else:
        return 'NAN'

df_assets['ASSETS_RANGE'] = df_assets.apply(asset_range,axis =1)
df_assets['COUNT'] = 1
df_assets = df_assets[df_assets['ASSETS_RANGE'] != 'NAN']
counts = df_assets.groupby('ASSETS_RANGE')['COUNT'].sum(sort=True)
labels = counts.index
values = counts.values
pie = go.Pie(labels=labels, values=values, marker=dict(line=dict(color='#000000', width=1)))
layout = go.Layout(title='Assests of Candidates')
fig = go.Figure(data=[pie], layout=layout)
py.iplot(fig)

Around 69.2% of candidates have assests above crore and 29.5% of candidates have assest above lakhs rupee. 

#Male vs Female ratio

In [None]:
df_gender = df['GENDER'].value_counts().reset_index()
df_gender.columns = ['GENDER','COUNT']
pie = go.Pie(labels=df_gender['GENDER'], values=df_gender['COUNT'], marker=dict(line=dict(color='black', width=1)))
layout = go.Layout(title='Male vs Female Ratio - All Candidates')
fig = go.Figure(data=[pie], layout=layout)
py.iplot(fig)

Only 258 female candidate was there in 2019 election which comprise of 12.8% of total candidate.

In [None]:
df_gender_won =df[df['WINNER'] == 1]
df_gender_won = df_gender_won['GENDER'].value_counts().reset_index()
df_gender_won.columns = ['GENDER','COUNT']
pie = go.Pie(labels=df_gender_won['GENDER'], values=df_gender_won['COUNT'], marker=dict(line=dict(color='black', width=1)))
layout = go.Layout(title='Male vs Female Ratio - Winners')
fig = go.Figure(data=[pie], layout=layout)
py.iplot(fig)

Above pie chart show that only 76 female candidate won the election and 463 male candidate won the 2019 general election. 