In [None]:
#Data.csv

**Step 1: Importing the libraries**

In [None]:

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split


**Step 2: Importing dataset**

In [None]:
data = pd.read_csv('Data.csv')


In [None]:
data1 = data 

**Step 3: Handling the missing data**

In [None]:
data.head()

Unnamed: 0,Country,Age,Salary,Purchased
0,France,44.0,72000.0,No
1,Spain,27.0,48000.0,Yes
2,Germany,30.0,54000.0,No
3,Spain,38.0,61000.0,No
4,Germany,40.0,,Yes


In [None]:
data.isnull().sum()

Country      0
Age          1
Salary       1
Purchased    0
dtype: int64

In [None]:
data.Country.value_counts()

France     4
Spain      3
Germany    3
Name: Country, dtype: int64

In [None]:
data.Age.value_counts()

44.0    1
27.0    1
30.0    1
38.0    1
40.0    1
35.0    1
48.0    1
50.0    1
37.0    1
Name: Age, dtype: int64

In [None]:
data.Salary.value_counts()

72000.0    1
48000.0    1
54000.0    1
61000.0    1
58000.0    1
52000.0    1
79000.0    1
83000.0    1
67000.0    1
Name: Salary, dtype: int64

In [None]:
data['Age'] = data['Age'].fillna(data['Age'].mean())

In [None]:
data['Salary'] = data['Salary'].fillna(data['Salary'].mean())

In [None]:
data.isnull().sum()

Country      0
Age          0
Salary       0
Purchased    0
dtype: int64

**Step 4: Encoding categorical data**

In [None]:
data['Country'].value_counts()

France     4
Spain      3
Germany    3
Name: Country, dtype: int64

In [None]:
data['Purchased'].value_counts()

No     5
Yes    5
Name: Purchased, dtype: int64

In [None]:
#use onehot encoding for Country and label encoding for Purchased 

In [None]:
from sklearn.preprocessing import OrdinalEncoder
from sklearn.preprocessing import OneHotEncoder

In [None]:
OHE = OneHotEncoder(sparse = False)
OE = OrdinalEncoder()

In [None]:
data_OHE = OHE.fit_transform(data[['Country']])
df_OHE = pd.DataFrame(data_OHE , columns = ['Country_1' , 'Country_2' , 'Country_3'])
df_OHE

Unnamed: 0,Country_1,Country_2,Country_3
0,1.0,0.0,0.0
1,0.0,0.0,1.0
2,0.0,1.0,0.0
3,0.0,0.0,1.0
4,0.0,1.0,0.0
5,1.0,0.0,0.0
6,0.0,0.0,1.0
7,1.0,0.0,0.0
8,0.0,1.0,0.0
9,1.0,0.0,0.0


In [None]:
data_OE = OE.fit_transform(data[['Purchased']])   
df_OE = pd.DataFrame(data_OE , columns = ['Purchased'])
df_OE


Unnamed: 0,Purchased
0,0.0
1,1.0
2,0.0
3,0.0
4,1.0
5,1.0
6,0.0
7,1.0
8,0.0
9,1.0


In [None]:
data = pd.concat([df_OHE , data.iloc[: , 1:-1] , df_OE] ,axis = 1)

In [None]:
data

Unnamed: 0,Country_1,Country_2,Country_3,Age,Salary,Purchased
0,1.0,0.0,0.0,44.0,72000.0,0.0
1,0.0,0.0,1.0,27.0,48000.0,1.0
2,0.0,1.0,0.0,30.0,54000.0,0.0
3,0.0,0.0,1.0,38.0,61000.0,0.0
4,0.0,1.0,0.0,40.0,63777.777778,1.0
5,1.0,0.0,0.0,35.0,58000.0,1.0
6,0.0,0.0,1.0,38.777778,52000.0,0.0
7,1.0,0.0,0.0,48.0,79000.0,1.0
8,0.0,1.0,0.0,50.0,83000.0,0.0
9,1.0,0.0,0.0,37.0,67000.0,1.0


**Step 5: Creating a dummy variable**

In [None]:
data1 = pd.get_dummies(data1)
data1

Unnamed: 0,Age,Salary,Country_France,Country_Germany,Country_Spain,Purchased_No,Purchased_Yes
0,44.0,72000.0,1,0,0,1,0
1,27.0,48000.0,0,0,1,0,1
2,30.0,54000.0,0,1,0,1,0
3,38.0,61000.0,0,0,1,1,0
4,40.0,63777.777778,0,1,0,0,1
5,35.0,58000.0,1,0,0,0,1
6,38.777778,52000.0,0,0,1,1,0
7,48.0,79000.0,1,0,0,0,1
8,50.0,83000.0,0,1,0,1,0
9,37.0,67000.0,1,0,0,0,1


**Step 6: Splitting the datasets into training sets and Test sets**

In [None]:
X = data.iloc[: , 0:-1]

In [None]:
y = data['Purchased']

In [None]:
X_train , X_test , y_train , y_test = train_test_split(X,y, random_state = 10 , test_size = 0.1)

**Step 7: Feature Scaling**

In [None]:
mm = MinMaxScaler()

In [None]:
X = mm.fit_transform(X)

In [None]:
pd.DataFrame(X , columns = data.columns[0:-1])

Unnamed: 0,Country_1,Country_2,Country_3,Age,Salary
0,1.0,0.0,0.0,0.73913,0.685714
1,0.0,0.0,1.0,0.0,0.0
2,0.0,1.0,0.0,0.130435,0.171429
3,0.0,0.0,1.0,0.478261,0.371429
4,0.0,1.0,0.0,0.565217,0.450794
5,1.0,0.0,0.0,0.347826,0.285714
6,0.0,0.0,1.0,0.512077,0.114286
7,1.0,0.0,0.0,0.913043,0.885714
8,0.0,1.0,0.0,1.0,1.0
9,1.0,0.0,0.0,0.434783,0.542857
