# Import Libraries 

In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import csv
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

# Dataset

In [4]:
allen = pd.read_csv("Dataset.csv")

In [5]:
allen.head()

Unnamed: 0,Name,Party,Constituency,State,Age,Gender,Prev_Contested,Prev_Wins,Assets,Cases,Prev_Party
0,OMALLOOR RAMACHANDRAN,Ambedkarite Party of India,ARANMULA,KERALA,53,M,0,0,195000,0,0
1,SANTHI OMALLOOR,Anna Democratic Human Rights Movement Party of...,ARANMULA,KERALA,36,F,0,0,50000,0,0
2,BIJU MATHEW,Bharatiya Janata Party,ARANMULA,KERALA,46,M,0,0,16905842,3,0
3,VEENA GEORGE,Communist Party of India (Marxist),ARANMULA,KERALA,44,F,1,1,21717808,0,2
4,K. SIVADASAN NAIR,Indian National Congress,ARANMULA,KERALA,72,M,2,1,9212961,1,6


In [6]:
allen.describe()

Unnamed: 0,Age,Prev_Contested,Prev_Wins,Assets,Cases,Prev_Party
count,9.0,9.0,9.0,9.0,9.0,9.0
mean,52.333333,0.444444,0.222222,9649177.0,0.555556,1.777778
std,12.196311,0.726483,0.440959,9141307.0,1.013794,1.855921
min,36.0,0.0,0.0,50000.0,0.0,0.0
25%,44.0,0.0,0.0,290000.0,0.0,0.0
50%,52.0,0.0,0.0,8540864.0,0.0,2.0
75%,63.0,1.0,0.0,16905840.0,1.0,2.0
max,72.0,2.0,1.0,23425000.0,3.0,6.0


In [7]:
allen.info

<bound method DataFrame.info of                     Name                                              Party  \
0  OMALLOOR RAMACHANDRAN                         Ambedkarite Party of India   
1        SANTHI OMALLOOR  Anna Democratic Human Rights Movement Party of...   
2            BIJU MATHEW                             Bharatiya Janata Party   
3           VEENA GEORGE                 Communist Party of India (Marxist)   
4      K. SIVADASAN NAIR                           Indian National Congress   
5           ARJUNAN C. K                                        Independent   
6            G. SUGATHAN                                        Independent   
7      PRASANTH ARANMULA                                        Independent   
8         SIVADASAN NAIR                                        Independent   

  Constituency   State  Age Gender  Prev_Contested  Prev_Wins    Assets  \
0     ARANMULA  KERALA   53      M               0          0    195000   
1     ARANMULA  KERALA   36

# Dataset.csv - Label Encoding 

### Party

In [8]:
l1 = preprocessing.LabelEncoder()
f1 = l1.fit_transform(allen['Party']) 
f1 = pd.DataFrame(data=f1, columns=['Party'])

### Gender

In [9]:
l2 = preprocessing.LabelEncoder()
f2 = l2.fit_transform(allen['Gender']) 
f2 = pd.DataFrame(data=f2, columns=['Gender'])

### Constituency

In [11]:
l3 = preprocessing.LabelEncoder()
f3 = l3.fit_transform(allen['Constituency']) 
f3 = pd.DataFrame(data=f3, columns=['Constituency'])

### State

In [12]:
l4 = preprocessing.LabelEncoder()
f4 = l4.fit_transform(allen['State']) 
f4 = pd.DataFrame(data=f4, columns=['State'])

### Updation

In [13]:
allen['Party'] = f1['Party']
allen['Gender'] = f2['Gender']
allen['Constituency'] = f3['Constituency']
allen['State'] = f4['State']

# Dataset.csv - MinMaxScaler 

In [673]:
ms = MinMaxScaler()
columns = ['Age ','no of experience in years']
allen1[columns] = ms.fit_transform(allen1[columns])

In [674]:
allen1.head()

Unnamed: 0,S.No,company,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month
0,1,0,0.0,1,0,0.0,3,30000
1,2,0,0.136364,0,1,0.047619,2,50000
2,3,0,0.409091,1,1,0.238095,1,120000
3,4,0,0.090909,1,0,0.095238,3,42000
4,5,0,0.0,0,0,0.0,2,35000


# Dataset.csv - Index

In [675]:
print(list(l1.inverse_transform([0,1,2,3,4,5,6,7])))

['ABBB', 'Bashh', 'Link', 'Oxon', 'TSC', 'neuromorphic', 'palm', 'penguin ']


In [676]:
print(list(l2.inverse_transform([0,1,2,3])))

['Predective analysis', 'cloud server management', 'predective analysis', 'programming ']


In [677]:
print(list(l3.inverse_transform([0,1])))

['F', 'M']


In [678]:
print(list(l4.inverse_transform([0,1])))

['B.E', 'M.E']


# Training

In [679]:
x = allen1.drop(['S.No','salary package per month'],axis=1)
x.head()

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work
0,0,0.0,1,0,0.0,3
1,0,0.136364,0,1,0.047619,2
2,0,0.409091,1,1,0.238095,1
3,0,0.090909,1,0,0.095238,3
4,0,0.0,0,0,0.0,2


In [680]:
y = allen1['salary package per month']

In [681]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.48,random_state=1)

In [682]:
model = LinearRegression()
model.fit(xtrain,ytrain)
p = model.predict(xtest)
print(r2_score(ytest, p))

0.8081462969225229


# Test.csv - Creation

### Assignment Q1

In [683]:
companies = list(l1.inverse_transform([0,1,2,3,4,5,6,7]))

In [684]:
allen2 = 0
allen2 = pd.DataFrame(columns=['company','Age ','Gender','Qualification','no of experience in years','Area of work'])

In [685]:
for i in range(0,len(companies)):
    init = {"company": companies[i], 
            "Age ": 35,
            "Gender": "F",
            "Qualification": "B.E",
            "no of experience in years": 12,
            "Area of work": "cloud server management"
    }
    allen2 = allen2.append(init, ignore_index=True)

In [686]:
print(allen2.to_string())

        company Age  Gender Qualification no of experience in years             Area of work
0          ABBB   35      F           B.E                        12  cloud server management
1         Bashh   35      F           B.E                        12  cloud server management
2          Link   35      F           B.E                        12  cloud server management
3          Oxon   35      F           B.E                        12  cloud server management
4           TSC   35      F           B.E                        12  cloud server management
5  neuromorphic   35      F           B.E                        12  cloud server management
6          palm   35      F           B.E                        12  cloud server management
7      penguin    35      F           B.E                        12  cloud server management


# Test.csv - Label Encoding

### Company

In [687]:
l5 = preprocessing.LabelEncoder()
f5 = l5.fit_transform(allen2['company']) 
f5 = pd.DataFrame(data=f5, columns=['company'])

### Area

In [688]:
l6 = preprocessing.LabelEncoder()
f6 = l6.fit_transform(allen2['Area of work']) 
f6 = pd.DataFrame(data=f6, columns=['Area of work'])

### Gender

In [689]:
l7 = preprocessing.LabelEncoder()
f7 = l7.fit_transform(allen2['Gender']) 
f7 = pd.DataFrame(data=f7, columns=['Gender'])

### Qualification

In [690]:
l8 = preprocessing.LabelEncoder()
f8 = l8.fit_transform(allen2['Qualification']) 
f8 = pd.DataFrame(data=f8, columns=['Qualification'])

### Updation

In [691]:
allen2['company'] = f5['company']
allen2['Area of work'] = f6['Area of work']
allen2['Gender'] = f7['Gender']
allen2['Qualification'] = f8['Qualification']

# Test.csv - MinMaxScaler

In [692]:
ms = MinMaxScaler()
columns = ['Age ','no of experience in years']
allen2[columns] = ms.fit_transform(allen2[columns])

# Test.csv - Prediction

In [693]:
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work
0,0,0.0,0,0,0.0,0
1,1,0.0,0,0,0.0,0
2,2,0.0,0,0,0.0,0
3,3,0.0,0,0,0.0,0
4,4,0.0,0,0,0.0,0
5,5,0.0,0,0,0.0,0
6,6,0.0,0,0,0.0,0
7,7,0.0,0,0,0.0,0


In [694]:
p2 = model.predict(allen2)

In [695]:
allen2['salary'] = p2
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,0,0.0,0,0,0.0,0,69879.291815
1,1,0.0,0,0,0.0,0,63828.111187
2,2,0.0,0,0,0.0,0,57776.93056
3,3,0.0,0,0,0.0,0,51725.749932
4,4,0.0,0,0,0.0,0,45674.569305
5,5,0.0,0,0,0.0,0,39623.388678
6,6,0.0,0,0,0.0,0,33572.20805
7,7,0.0,0,0,0.0,0,27521.027423


# Test.csv - Index

In [696]:
print(l5.inverse_transform(f5['company']))
allen2['company'] = l5.inverse_transform(f5['company'])

['ABBB' 'Bashh' 'Link' 'Oxon' 'TSC' 'neuromorphic' 'palm' 'penguin ']


In [697]:
print(l6.inverse_transform(f6['Area of work']))
allen2['Area of work'] = l6.inverse_transform(f6['Area of work'])

['cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management']


In [698]:
print(l7.inverse_transform(f7['Gender']))
allen2['Gender'] = l7.inverse_transform(f7['Gender'])

['F' 'F' 'F' 'F' 'F' 'F' 'F' 'F']


In [699]:
print(l8.inverse_transform(f8['Qualification']))
allen2['Qualification'] = l8.inverse_transform(f8['Qualification'])

['B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E']


In [700]:
print(ms.inverse_transform(allen2[columns]))
allen2[columns] = ms.inverse_transform(allen2[columns])

[[35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]]


In [701]:
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,ABBB,35.0,F,B.E,12.0,cloud server management,69879.291815
1,Bashh,35.0,F,B.E,12.0,cloud server management,63828.111187
2,Link,35.0,F,B.E,12.0,cloud server management,57776.93056
3,Oxon,35.0,F,B.E,12.0,cloud server management,51725.749932
4,TSC,35.0,F,B.E,12.0,cloud server management,45674.569305
5,neuromorphic,35.0,F,B.E,12.0,cloud server management,39623.388678
6,palm,35.0,F,B.E,12.0,cloud server management,33572.20805
7,penguin,35.0,F,B.E,12.0,cloud server management,27521.027423


# Test.csv - Save

In [702]:
allen2.to_csv('Test.csv')

# Validation.csv - Creation

### Assignment Q2 

In [703]:
allen3 = pd.read_csv("Dataset2.csv")
allen4 = pd.read_csv("Dataset2.csv")
allen3.head(8)

Unnamed: 0,S.No,person,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month
0,,Bobby,22,F,B.E,1,programming,
1,,Rahul,28,M,B.E,5,predictive analysis,
2,,Jhanvi,30,F,M.E,4,cloud server management,
3,,Sanjay,35,M,M.E,9,predictive analysis,
4,,sukruti,29,F,M.E,5,cloud server management,
5,,prachi,27,F,B.E,5,predictive analysis,
6,,Nazim,26,M,B.E,5,cloud server management,
7,,Maithri,24,F,M.E,1,cloud server management,


In [704]:
allen3["company"] = companies[0]
for i in range(1,len(companies)):
    allen4["company"] = companies[i]
    allen3 = pd.concat([allen3, allen4], ignore_index = True)
allen3.head(64)

Unnamed: 0,S.No,person,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month,company
0,,Bobby,22,F,B.E,1,programming,,ABBB
1,,Rahul,28,M,B.E,5,predictive analysis,,ABBB
2,,Jhanvi,30,F,M.E,4,cloud server management,,ABBB
3,,Sanjay,35,M,M.E,9,predictive analysis,,ABBB
4,,sukruti,29,F,M.E,5,cloud server management,,ABBB
...,...,...,...,...,...,...,...,...,...
59,,Sanjay,35,M,M.E,9,predictive analysis,,penguin
60,,sukruti,29,F,M.E,5,cloud server management,,penguin
61,,prachi,27,F,B.E,5,predictive analysis,,penguin
62,,Nazim,26,M,B.E,5,cloud server management,,penguin


# Validation.csv - Label Encoding

### Company

In [705]:
l9 = preprocessing.LabelEncoder()
f9 = l9.fit_transform(allen3['company']) 
f9 = pd.DataFrame(data=f9, columns=['company'])

### Area

In [706]:
l10 = preprocessing.LabelEncoder()
f10 = l10.fit_transform(allen3['Area of work']) 
f10 = pd.DataFrame(data=f10, columns=['Area of work'])

### Gender

In [707]:
l11 = preprocessing.LabelEncoder()
f11 = l11.fit_transform(allen3['Gender']) 
f11 = pd.DataFrame(data=f11, columns=['Gender'])

### Qualification

In [708]:
l12 = preprocessing.LabelEncoder()
f12 = l12.fit_transform(allen3['Qualification']) 
f12 = pd.DataFrame(data=f12, columns=['Qualification'])

### Updation

In [709]:
allen3['company'] = f9['company']
allen3['Area of work'] = f10['Area of work']
allen3['Gender'] = f11['Gender']
allen3['Qualification'] = f12['Qualification']

# Validation.csv - MinMaxScaler

In [710]:
ms = MinMaxScaler()
columns = ['Age ','no of experience in years']
allen3[columns] = ms.fit_transform(allen3[columns])

# Validation.csv - Prediction

In [711]:
allen3 = allen3.drop(['S.No','person','salary package per month'],axis=1)
allen3 = allen3[['company','Age ','Gender','Qualification','no of experience in years','Area of work']]
allen3.head(64)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work
0,0,0.000000,0,0,0.000,2
1,0,0.461538,1,0,0.500,1
2,0,0.615385,0,1,0.375,0
3,0,1.000000,1,1,1.000,1
4,0,0.538462,0,1,0.500,0
...,...,...,...,...,...,...
59,7,1.000000,1,1,1.000,1
60,7,0.538462,0,1,0.500,0
61,7,0.384615,0,0,0.500,1
62,7,0.307692,1,0,0.500,0


In [712]:
p3 = model.predict(allen3)

In [713]:
allen3['salary'] = p3
allen3.head(64)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,0,0.000000,0,0,0.000,2,54824.880146
1,0,0.461538,1,0,0.500,1,208400.793430
2,0,0.615385,0,1,0.375,0,145345.375236
3,0,1.000000,1,1,1.000,1,352574.982362
4,0,0.538462,0,1,0.500,0,214053.480747
...,...,...,...,...,...,...,...
59,7,1.000000,1,1,1.000,1,310216.717970
60,7,0.538462,0,1,0.500,0,171695.216355
61,7,0.384615,0,0,0.500,1,185702.464296
62,7,0.307692,1,0,0.500,0,197819.463614


In [714]:
print(allen3.to_string())

    company      Age   Gender  Qualification  no of experience in years  Area of work         salary
0         0  0.000000       0              0                      0.000             2   54824.880146
1         0  0.461538       1              0                      0.500             1  208400.793430
2         0  0.615385       0              1                      0.375             0  145345.375236
3         0  1.000000       1              1                      1.000             1  352574.982362
4         0  0.538462       0              1                      0.500             0  214053.480747
5         0  0.384615       0              0                      0.500             1  228060.728687
6         0  0.307692       1              0                      0.500             0  240177.728006
7         0  0.153846       0              1                      0.000             0   48344.838040
8         1  0.000000       0              0                      0.000             2   487

# Validation.csv - Index

In [715]:
allen3['company'] = l9.inverse_transform(f9['company'])

In [716]:
allen3['Area of work'] = l10.inverse_transform(f10['Area of work'])

In [717]:
allen3['Gender'] = l11.inverse_transform(f11['Gender'])

In [718]:
allen3['Qualification'] = l12.inverse_transform(f12['Qualification'])

In [719]:
allen3[columns] = ms.inverse_transform(allen3[columns])

In [720]:
allen3.head(64)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,ABBB,22.0,F,B.E,1.0,programming,54824.880146
1,ABBB,28.0,M,B.E,5.0,predictive analysis,208400.793430
2,ABBB,30.0,F,M.E,4.0,cloud server management,145345.375236
3,ABBB,35.0,M,M.E,9.0,predictive analysis,352574.982362
4,ABBB,29.0,F,M.E,5.0,cloud server management,214053.480747
...,...,...,...,...,...,...,...
59,penguin,35.0,M,M.E,9.0,predictive analysis,310216.717970
60,penguin,29.0,F,M.E,5.0,cloud server management,171695.216355
61,penguin,27.0,F,B.E,5.0,predictive analysis,185702.464296
62,penguin,26.0,M,B.E,5.0,cloud server management,197819.463614


In [721]:
print(allen3.to_string())

         company  Age  Gender Qualification  no of experience in years             Area of work         salary
0           ABBB  22.0      F           B.E                        1.0              programming   54824.880146
1           ABBB  28.0      M           B.E                        5.0      predictive analysis  208400.793430
2           ABBB  30.0      F           M.E                        4.0  cloud server management  145345.375236
3           ABBB  35.0      M           M.E                        9.0      predictive analysis  352574.982362
4           ABBB  29.0      F           M.E                        5.0  cloud server management  214053.480747
5           ABBB  27.0      F           B.E                        5.0      predictive analysis  228060.728687
6           ABBB  26.0      M           B.E                        5.0  cloud server management  240177.728006
7           ABBB  24.0      F           M.E                        1.0  cloud server management   48344.838040
8

# Validation.csv - Save

In [722]:
allen3.to_csv('Validation.csv')