# Import Libraries 

In [44]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import csv
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [40]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

In [42]:
from xgboost import XGBClassifier

# Dataset

In [4]:
allen = pd.read_csv("Dataset.csv")

In [5]:
allen.head()

Unnamed: 0,Name,Party,Constituency,State,Age,Gender,Prev_Contested,Prev_Wins,Assets,Cases,Prev_Party
0,OMALLOOR RAMACHANDRAN,Ambedkarite Party of India,ARANMULA,KERALA,53,M,0,0,195000,0,0
1,SANTHI OMALLOOR,Anna Democratic Human Rights Movement Party of...,ARANMULA,KERALA,36,F,0,0,50000,0,0
2,BIJU MATHEW,Bharatiya Janata Party,ARANMULA,KERALA,46,M,0,0,16905842,3,0
3,VEENA GEORGE,Communist Party of India (Marxist),ARANMULA,KERALA,44,F,1,1,21717808,0,2
4,K. SIVADASAN NAIR,Indian National Congress,ARANMULA,KERALA,72,M,2,1,9212961,1,6


In [6]:
allen.describe()

Unnamed: 0,Age,Prev_Contested,Prev_Wins,Assets,Cases,Prev_Party
count,9.0,9.0,9.0,9.0,9.0,9.0
mean,52.333333,0.444444,0.222222,9649177.0,0.555556,1.777778
std,12.196311,0.726483,0.440959,9141307.0,1.013794,1.855921
min,36.0,0.0,0.0,50000.0,0.0,0.0
25%,44.0,0.0,0.0,290000.0,0.0,0.0
50%,52.0,0.0,0.0,8540864.0,0.0,2.0
75%,63.0,1.0,0.0,16905840.0,1.0,2.0
max,72.0,2.0,1.0,23425000.0,3.0,6.0


In [7]:
allen.info

<bound method DataFrame.info of                     Name                                              Party  \
0  OMALLOOR RAMACHANDRAN                         Ambedkarite Party of India   
1        SANTHI OMALLOOR  Anna Democratic Human Rights Movement Party of...   
2            BIJU MATHEW                             Bharatiya Janata Party   
3           VEENA GEORGE                 Communist Party of India (Marxist)   
4      K. SIVADASAN NAIR                           Indian National Congress   
5           ARJUNAN C. K                                        Independent   
6            G. SUGATHAN                                        Independent   
7      PRASANTH ARANMULA                                        Independent   
8         SIVADASAN NAIR                                        Independent   

  Constituency   State  Age Gender  Prev_Contested  Prev_Wins    Assets  \
0     ARANMULA  KERALA   53      M               0          0    195000   
1     ARANMULA  KERALA   36

# Dataset.csv - Label Encoding 

### Party

In [8]:
l1 = preprocessing.LabelEncoder()
f1 = l1.fit_transform(allen['Party']) 
f1 = pd.DataFrame(data=f1, columns=['Party'])

### Gender

In [9]:
l2 = preprocessing.LabelEncoder()
f2 = l2.fit_transform(allen['Gender']) 
f2 = pd.DataFrame(data=f2, columns=['Gender'])

### Constituency

In [11]:
l3 = preprocessing.LabelEncoder()
f3 = l3.fit_transform(allen['Constituency']) 
f3 = pd.DataFrame(data=f3, columns=['Constituency'])

### State

In [12]:
l4 = preprocessing.LabelEncoder()
f4 = l4.fit_transform(allen['State']) 
f4 = pd.DataFrame(data=f4, columns=['State'])

### Updation

In [13]:
allen['Party'] = f1['Party']
allen['Gender'] = f2['Gender']
allen['Constituency'] = f3['Constituency']
allen['State'] = f4['State']

# Dataset.csv - MinMaxScaler 

In [23]:
ms = MinMaxScaler()
columns = ['Age','Prev_Party','Prev_Contested','Prev_Wins','Assets','Cases']
allen[columns] = ms.fit_transform(allen[columns])

In [24]:
allen.head()

Unnamed: 0,Name,Party,Constituency,State,Age,Gender,Prev_Contested,Prev_Wins,Assets,Cases,Prev_Party
0,OMALLOOR RAMACHANDRAN,0,0,0,0.472222,1,0.0,0.0,0.006203,0.0,0.0
1,SANTHI OMALLOOR,1,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0
2,BIJU MATHEW,2,0,0,0.277778,1,0.0,0.0,0.721106,1.0,0.0
3,VEENA GEORGE,3,0,0,0.222222,0,0.5,1.0,0.926965,0.0,0.333333
4,K. SIVADASAN NAIR,5,0,0,1.0,1,1.0,1.0,0.391998,0.333333,1.0


In [25]:
allen.info

<bound method DataFrame.info of                     Name  Party  Constituency  State       Age  Gender  \
0  OMALLOOR RAMACHANDRAN      0             0      0  0.472222       1   
1        SANTHI OMALLOOR      1             0      0  0.000000       0   
2            BIJU MATHEW      2             0      0  0.277778       1   
3           VEENA GEORGE      3             0      0  0.222222       0   
4      K. SIVADASAN NAIR      5             0      0  1.000000       1   
5           ARJUNAN C. K      4             0      0  0.444444       1   
6            G. SUGATHAN      4             0      0  0.805556       1   
7      PRASANTH ARANMULA      4             0      0  0.111111       1   
8         SIVADASAN NAIR      4             0      0  0.750000       1   

   Prev_Contested  Prev_Wins    Assets     Cases  Prev_Party  
0             0.0        0.0  0.006203  0.000000    0.000000  
1             0.0        0.0  0.000000  0.000000    0.000000  
2             0.0        0.0  0.721106

# Dataset.csv - Index

In [26]:
print(list(l1.inverse_transform([0,1,2,3,4,5])))

['Ambedkarite Party of India', 'Anna Democratic Human Rights Movement Party of India', 'Bharatiya Janata Party', 'Communist Party of India (Marxist)', 'Independent', 'Indian National Congress']


In [27]:
print(list(l2.inverse_transform([0,1])))

['F', 'M']


In [28]:
print(list(l3.inverse_transform([0])))

['ARANMULA']


In [29]:
print(list(l4.inverse_transform([0])))

['KERALA']


# Training

In [37]:
x = allen.drop(['Name','Prev_Wins'],axis=1)
x.head()

Unnamed: 0,Party,Constituency,State,Age,Gender,Prev_Contested,Assets,Cases,Prev_Party
0,0,0,0,0.472222,1,0.0,0.006203,0.0,0.0
1,1,0,0,0.0,0,0.0,0.0,0.0,0.0
2,2,0,0,0.277778,1,0.0,0.721106,1.0,0.0
3,3,0,0,0.222222,0,0.5,0.926965,0.0,0.333333
4,5,0,0,1.0,1,1.0,0.391998,0.333333,1.0


In [38]:
y = allen['Prev_Wins']

In [39]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.2,random_state=1)

In [47]:
model = XGBClassifier()
model.fit(xtrain,ytrain)
p = model.predict(xtest)
p



array([0., 0.])

### Accuracy

In [48]:
print(accuracy_score(p,ytest))

1.0
