# Import Libraries 

In [660]:
import warnings
warnings.filterwarnings('ignore')

In [661]:
import csv
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [662]:
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import accuracy_score, log_loss, confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

# Dataset

In [663]:
allen1 = pd.read_csv("Dataset1.csv")
allen3 = pd.read_csv("Dataset2.csv")

In [664]:
allen1.head()

Unnamed: 0,S.No,company,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month
0,1,ABBB,21,M,B.E,0,programming,30000
1,2,ABBB,24,F,M.E,1,predective analysis,50000
2,3,ABBB,30,M,M.E,5,cloud server management,120000
3,4,ABBB,23,M,B.E,2,programming,42000
4,5,ABBB,21,F,B.E,0,predective analysis,35000


In [665]:
allen3.head()

Unnamed: 0,S.No,person,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month
0,,Bobby,22,F,B.E,1,programming,
1,,Rahul,28,M,B.E,5,predictive analysis,
2,,Jhanvi,30,F,M.E,4,cloud server management,
3,,Sanjay,35,M,M.E,9,predictive analysis,
4,,sukruti,29,F,M.E,5,cloud server management,


In [666]:
allen1.describe()

Unnamed: 0,S.No,Age,no of experience in years,salary package per month
count,52.0,52.0,52.0,52.0
mean,26.5,27.788462,4.980769,87769.230769
std,15.154757,6.34726,6.027682,97667.024436
min,1.0,21.0,0.0,20000.0
25%,13.75,22.0,0.75,25000.0
50%,26.5,26.0,2.0,39500.0
75%,39.25,31.25,8.25,112500.0
max,52.0,43.0,21.0,500000.0


In [667]:
allen3.describe()

Unnamed: 0,S.No,Age,no of experience in years,salary package per month
count,0.0,8.0,8.0,0.0
mean,,27.625,4.375,
std,,3.961872,2.559994,
min,,22.0,1.0,
25%,,25.5,3.25,
50%,,27.5,5.0,
75%,,29.25,5.0,
max,,35.0,9.0,


# Dataset.csv - Label Encoding 

### Company

In [668]:
l1 = preprocessing.LabelEncoder()
f1 = l1.fit_transform(allen1['company']) 
f1 = pd.DataFrame(data=f1, columns=['company'])

### Area

In [669]:
l2 = preprocessing.LabelEncoder()
f2 = l2.fit_transform(allen1['Area of work']) 
f2 = pd.DataFrame(data=f2, columns=['Area of work'])

### Gender

In [670]:
l3 = preprocessing.LabelEncoder()
f3 = l3.fit_transform(allen1['Gender']) 
f3 = pd.DataFrame(data=f3, columns=['Gender'])

### Qualification

In [671]:
l4 = preprocessing.LabelEncoder()
f4 = l4.fit_transform(allen1['Qualification']) 
f4 = pd.DataFrame(data=f4, columns=['Qualification'])

### Updation

In [672]:
allen1['company'] = f1['company']
allen1['Area of work'] = f2['Area of work']
allen1['Gender'] = f3['Gender']
allen1['Qualification'] = f4['Qualification']

# Dataset.csv - MinMaxScaler 

In [673]:
ms = MinMaxScaler()
columns = ['Age ','no of experience in years']
allen1[columns] = ms.fit_transform(allen1[columns])

In [674]:
allen1.head()

Unnamed: 0,S.No,company,Age,Gender,Qualification,no of experience in years,Area of work,salary package per month
0,1,0,0.0,1,0,0.0,3,30000
1,2,0,0.136364,0,1,0.047619,2,50000
2,3,0,0.409091,1,1,0.238095,1,120000
3,4,0,0.090909,1,0,0.095238,3,42000
4,5,0,0.0,0,0,0.0,2,35000


# Dataset.csv - Index

In [675]:
print(list(l1.inverse_transform([0,1,2,3,4,5,6,7])))

['ABBB', 'Bashh', 'Link', 'Oxon', 'TSC', 'neuromorphic', 'palm', 'penguin ']


In [676]:
print(list(l2.inverse_transform([0,1,2,3])))

['Predective analysis', 'cloud server management', 'predective analysis', 'programming ']


In [677]:
print(list(l3.inverse_transform([0,1])))

['F', 'M']


In [678]:
print(list(l4.inverse_transform([0,1])))

['B.E', 'M.E']


# Training

In [679]:
x = allen1.drop(['S.No','salary package per month'],axis=1)
x.head()

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work
0,0,0.0,1,0,0.0,3
1,0,0.136364,0,1,0.047619,2
2,0,0.409091,1,1,0.238095,1
3,0,0.090909,1,0,0.095238,3
4,0,0.0,0,0,0.0,2


In [680]:
y = allen1['salary package per month']

In [681]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y,test_size=0.48,random_state=1)

In [682]:
model = LinearRegression()
model.fit(xtrain,ytrain)
p = model.predict(xtest)
print(r2_score(ytest, p))

0.8081462969225229


# Test.csv - Creation

### Assignment Q1

In [683]:
companies = list(l1.inverse_transform([0,1,2,3,4,5,6,7]))

In [684]:
allen2 = 0
allen2 = pd.DataFrame(columns=['company','Age ','Gender','Qualification','no of experience in years','Area of work'])

In [685]:
for i in range(0,len(companies)):
    init = {"company": companies[i], 
            "Age ": 35,
            "Gender": "F",
            "Qualification": "B.E",
            "no of experience in years": 12,
            "Area of work": "cloud server management"
    }
    allen2 = allen2.append(init, ignore_index=True)

In [686]:
print(allen2.to_string())

        company Age  Gender Qualification no of experience in years             Area of work
0          ABBB   35      F           B.E                        12  cloud server management
1         Bashh   35      F           B.E                        12  cloud server management
2          Link   35      F           B.E                        12  cloud server management
3          Oxon   35      F           B.E                        12  cloud server management
4           TSC   35      F           B.E                        12  cloud server management
5  neuromorphic   35      F           B.E                        12  cloud server management
6          palm   35      F           B.E                        12  cloud server management
7      penguin    35      F           B.E                        12  cloud server management


# Test.csv - Label Encoding

### Company

In [687]:
l5 = preprocessing.LabelEncoder()
f5 = l5.fit_transform(allen2['company']) 
f5 = pd.DataFrame(data=f5, columns=['company'])

### Area

In [688]:
l6 = preprocessing.LabelEncoder()
f6 = l6.fit_transform(allen2['Area of work']) 
f6 = pd.DataFrame(data=f6, columns=['Area of work'])

### Gender

In [689]:
l7 = preprocessing.LabelEncoder()
f7 = l7.fit_transform(allen2['Gender']) 
f7 = pd.DataFrame(data=f7, columns=['Gender'])

### Qualification

In [690]:
l8 = preprocessing.LabelEncoder()
f8 = l8.fit_transform(allen2['Qualification']) 
f8 = pd.DataFrame(data=f8, columns=['Qualification'])

### Updation

In [691]:
allen2['company'] = f5['company']
allen2['Area of work'] = f6['Area of work']
allen2['Gender'] = f7['Gender']
allen2['Qualification'] = f8['Qualification']

# Test.csv - MinMaxScaler

In [692]:
ms = MinMaxScaler()
columns = ['Age ','no of experience in years']
allen2[columns] = ms.fit_transform(allen2[columns])

# Test.csv - Prediction

In [693]:
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work
0,0,0.0,0,0,0.0,0
1,1,0.0,0,0,0.0,0
2,2,0.0,0,0,0.0,0
3,3,0.0,0,0,0.0,0
4,4,0.0,0,0,0.0,0
5,5,0.0,0,0,0.0,0
6,6,0.0,0,0,0.0,0
7,7,0.0,0,0,0.0,0


In [694]:
p2 = model.predict(allen2)

In [695]:
allen2['salary'] = p2
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,0,0.0,0,0,0.0,0,69879.291815
1,1,0.0,0,0,0.0,0,63828.111187
2,2,0.0,0,0,0.0,0,57776.93056
3,3,0.0,0,0,0.0,0,51725.749932
4,4,0.0,0,0,0.0,0,45674.569305
5,5,0.0,0,0,0.0,0,39623.388678
6,6,0.0,0,0,0.0,0,33572.20805
7,7,0.0,0,0,0.0,0,27521.027423


# Test.csv - Index

In [696]:
print(l5.inverse_transform(f5['company']))
allen2['company'] = l5.inverse_transform(f5['company'])

['ABBB' 'Bashh' 'Link' 'Oxon' 'TSC' 'neuromorphic' 'palm' 'penguin ']


In [697]:
print(l6.inverse_transform(f6['Area of work']))
allen2['Area of work'] = l6.inverse_transform(f6['Area of work'])

['cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management'
 'cloud server management' 'cloud server management']


In [698]:
print(l7.inverse_transform(f7['Gender']))
allen2['Gender'] = l7.inverse_transform(f7['Gender'])

['F' 'F' 'F' 'F' 'F' 'F' 'F' 'F']


In [699]:
print(l8.inverse_transform(f8['Qualification']))
allen2['Qualification'] = l8.inverse_transform(f8['Qualification'])

['B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E' 'B.E']


In [700]:
print(ms.inverse_transform(allen2[columns]))
allen2[columns] = ms.inverse_transform(allen2[columns])

[[35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]
 [35. 12.]]


In [701]:
allen2.head(8)

Unnamed: 0,company,Age,Gender,Qualification,no of experience in years,Area of work,salary
0,ABBB,35.0,F,B.E,12.0,cloud server management,69879.291815
1,Bashh,35.0,F,B.E,12.0,cloud server management,63828.111187
2,Link,35.0,F,B.E,12.0,cloud server management,57776.93056
3,Oxon,35.0,F,B.E,12.0,cloud server management,51725.749932
4,TSC,35.0,F,B.E,12.0,cloud server management,45674.569305
5,neuromorphic,35.0,F,B.E,12.0,cloud server management,39623.388678
6,palm,35.0,F,B.E,12.0,cloud server management,33572.20805
7,penguin,35.0,F,B.E,12.0,cloud server management,27521.027423


# Test.csv - Save

In [702]:
allen2.to_csv('Test.csv')