In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [2]:
data=pd.DataFrame({
    'Experience':[1,2,3,4,5,np.nan,7,8,9,10],
    'Education':["Bachelors","Bachelors","Masters","PhD","Bachelors",np.nan,"Bachelors","Masters","PhD","Bachelors"],
    'Salary':[35000,40000,45000,55000,50000,50000,62000,70000,75000,76000]
})
data


Unnamed: 0,Experience,Education,Salary
0,1.0,Bachelors,35000
1,2.0,Bachelors,40000
2,3.0,Masters,45000
3,4.0,PhD,55000
4,5.0,Bachelors,50000
5,,,50000
6,7.0,Bachelors,62000
7,8.0,Masters,70000
8,9.0,PhD,75000
9,10.0,Bachelors,76000


In [3]:
X=data[['Experience','Education']]
y=data['Salary']

In [4]:
num_cols=['Experience']
cat_cols=['Education']
# Create the Pipeline
numeric=Pipeline(steps=[
("imputer", SimpleImputer(strategy="mean")),
("scaler",StandardScaler())])

numeric

categorical=Pipeline(steps=[
("imputer", SimpleImputer(strategy="most_frequent")),
("encoder",OneHotEncoder(handle_unknown="ignore"))])

categorical

In [5]:
preprocess=ColumnTransformer([
    ('num',numeric,num_cols),
    ('cat',categorical,cat_cols)])

model=Pipeline(steps=[      #model.fit : you can change 'model' with any names e.g: pipe
    ('prep',preprocess),
    ('model',LinearRegression())])

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)
model.fit(X_train,y_train)  #model.fit : you can change 'model' with any names e.g: pipe.fit

In [7]:
y_pred=model.predict(X_test)
y_pred
print("Predicted Values Are:",y_pred)

Predicted Values Are: [78560.10032088 37232.84033489]


In [8]:
#Evaluation
mae=mean_absolute_error(y_test,y_pred)
rmse=np.sqrt(mean_squared_error(y_test,y_pred))
r2=r2_score(y_test,y_pred)
print("Mean Absolute Error:",round(mae,2))
print("Root Mean Squared Error:",round(rmse,2))
print("R-Squarred",round(r2,2))

Mean Absolute Error: 3163.63
Root Mean Squared Error: 3188.38
R-Squarred 0.97


In [9]:
# Predict for New Employee
exp=int(input('Enter the Experience: \t'))
edu=input('Enter the Education: \t')
new_employee=pd.DataFrame({
    "Experience":[exp],
    "Education":[edu]
})
predict_salary=model.predict(new_employee)
print("Predicted Final Salary: \t ",round(predict_salary[0],2))

Enter the Experience: 	5
Enter the Education: 	Masters
Predicted Final Salary: 	  55143.99


Build a Machine Learning Pipeline to Predict Fuel Efficiency (km/l) of a Car Based on Engine Size, HorsePower, and Fuel Type.
Evaluate the Model as well i.e. mae, rmse, r-2

In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler,OneHotEncoder
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer

In [2]:
data=pd.DataFrame({
    "EngineSize": [1.0, 1.2, 1.4, 1.6, 1.8, np.nan, 2.0, 2.2, 2.4, 2.6],
    "HorsePower": [68, 75, 90, 100, 110, 115, np.nan, 130, 150, 160],
    "FuelType": ["Petrol", "Diesel", "Petrol", "Diesel", "Petrol",
                 "Petrol", np.nan, "Diesel", "Petrol", "Diesel"],
    "FuelEfficiency": [20, 22, 19, 25, 18, 21, 23, 26, 17, 24]
})
data

Unnamed: 0,EngineSize,HorsePower,FuelType,FuelEfficiency
0,1.0,68.0,Petrol,20
1,1.2,75.0,Diesel,22
2,1.4,90.0,Petrol,19
3,1.6,100.0,Diesel,25
4,1.8,110.0,Petrol,18
5,,115.0,Petrol,21
6,2.0,,,23
7,2.2,130.0,Diesel,26
8,2.4,150.0,Petrol,17
9,2.6,160.0,Diesel,24


In [3]:
X=data[["EngineSize", "HorsePower", "FuelType"]]
y=data["FuelEfficiency"]

In [4]:
num_cols=["EngineSize", "HorsePower"]
cat_cols=["FuelType"]

In [5]:
numeric=Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="mean")),
    ("scaler", StandardScaler())
])

categorical=Pipeline(steps=[
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore"))
])

preprocess=ColumnTransformer([
    ("num", numeric, num_cols),
    ("cat", categorical, cat_cols)
])

model = Pipeline([
    ("prep", preprocess),
    ("model", LinearRegression())
])

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

In [7]:
model.fit(X_train,y_train)

In [8]:
y_pred=model.predict(X_test)
print("Predicted:", y_pred)

Predicted: [18.89758649 25.53470785]


In [9]:
print("MAE:", round(mean_absolute_error(y_test,y_pred),2))
print("RMSE:", round(np.sqrt(mean_squared_error(y_test,y_pred)),2))
print("R2:", round(r2_score(y_test,y_pred),2))

MAE: 2.72
RMSE: 2.84
R2: -0.29
