In [63]:
import pandas as pd
import numpy as np 
from sklearn.preprocessing import OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
%matplotlib inline

In [58]:
data = pd.read_csv("dataset.csv")
data

Unnamed: 0,No,Embarked,Sex,Age,Sallary
0,100,S,Famale,22.0,2330
1,200,C,Male,45.0,5520
2,100,S,Famale,28.0,3000
3,100,S,Famale,,2500
4,100,C,Famale,40.0,5236
5,200,Q,Male,33.0,3550
6,200,S,Male,22.0,2350
7,200,C,Male,,3850
8,100,S,Famale,32.0,3450
9,100,Q,Famale,28.0,3100


In [66]:
data.Sex.value_counts()

Famale    10
Male       9
Name: Sex, dtype: int64

In [10]:
X = data.iloc[:,:4]
y = data.Sallary

In [12]:
X

Unnamed: 0,No,Embarked,Sex,Age
0,100,S,Famale,22.0
1,200,C,Male,45.0
2,100,S,Famale,28.0
3,100,S,Famale,
4,100,C,Famale,40.0
5,200,Q,Male,33.0
6,200,S,Male,22.0
7,200,C,Male,
8,100,S,Famale,32.0
9,100,Q,Famale,28.0


In [13]:
o_h_encoder = OneHotEncoder()
imputer = SimpleImputer()
model = LinearRegression()

## make_column_transformer and make_pipeline

In [14]:
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline

In [15]:
column_transformer = make_column_transformer((o_h_encoder,["Embarked","Sex"]),
                                            (imputer, ["Age"]), remainder="passthrough")

In [16]:
pipe = make_pipeline(column_transformer, model)

In [20]:
X_train, X_test, y_train, y_test = train_test_split(X,y, random_state=1, test_size=0.2)

In [21]:
X_test

Unnamed: 0,No,Embarked,Sex,Age
3,100,S,Famale,
15,200,C,Male,29.0
6,200,S,Male,22.0
10,200,C,Male,31.0


In [23]:
pipe.fit(X_train,y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('onehotencoder',
                                                  OneHotEncoder(),
                                                  ['Embarked', 'Sex']),
                                                 ('simpleimputer',
                                                  SimpleImputer(), ['Age'])])),
                ('linearregression', LinearRegression())])

In [30]:
predict = pipe.predict(X_test)

In [33]:
from sklearn.metrics import r2_score
print(r2_score(y_test,predict))

-291.0735843527004


## ColumnTransformer and PipeLine

In [42]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [50]:
column_trans = ColumnTransformer(
[("Encoder",o_h_encoder,["Embarked","Sex"]),
 ("imputer", imputer,["Age"])],
  remainder="passthrough")

In [51]:
pipeline= Pipeline([("preprocessor",column_trans),("classifier",model)])

In [52]:
pipeline.fit(X_train,y_train)

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(remainder='passthrough',
                                   transformers=[('Encoder', OneHotEncoder(),
                                                  ['Embarked', 'Sex']),
                                                 ('imputer', SimpleImputer(),
                                                  ['Age'])])),
                ('classifier', LinearRegression())])

In [54]:
predict = pipeline.predict(X_test)

In [55]:
print(r2_score(y_test,predict))

-291.0735843527004


In [56]:
from sklearn.metrics import mean_absolute_error
print("MAE",mean_absolute_error(y_test,predict))

MAE 5237.853151910936


In [57]:
from sklearn.metrics import mean_squared_error
print("MSE",mean_squared_error(y_test,predict))

MSE 40284249.1218462
