In [1]:
import pandas as pd
import numpy as np
import math
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.neighbors import KNeighborsRegressor
import warnings as wr
wr.filterwarnings('ignore')

In [2]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [3]:
df.shape

(8555, 3)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8555 entries, 0 to 8554
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Gender  8555 non-null   object 
 1   Height  8555 non-null   float64
 2   Weight  8555 non-null   float64
dtypes: float64(2), object(1)
memory usage: 200.6+ KB


In [5]:
df.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

In [6]:
df.Gender.unique()

array(['Male', 'Female'], dtype=object)

In [7]:
le=LabelEncoder()

In [8]:
df.Gender=le.fit_transform(df.Gender)

In [9]:
df.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


In [10]:
df.corr()

Unnamed: 0,Gender,Height,Weight
Gender,1.0,0.682524,0.790208
Height,0.682524,1.0,0.922975
Weight,0.790208,0.922975,1.0


In [11]:
df.describe()

Unnamed: 0,Gender,Height,Weight
count,8555.0,8555.0,8555.0
mean,0.584454,66.809925,165.632735
std,0.492845,3.851454,32.043922
min,0.0,54.616858,65.78
25%,0.0,63.957684,139.876803
50%,1.0,66.985923,168.521567
75%,1.0,69.604427,190.666305
max,1.0,80.45,269.989698


In [12]:
x=df.drop('Weight',axis=1)
x.head()

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796


In [13]:
y=df.Weight

In [14]:
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=.30, random_state=42)

In [15]:
def watchRegressorResult(x,y,model, c, m):      
    score=model.score(x,y)
    py=model.predict(x)
    r2=r2_score(y,py)
    mse=mean_squared_error(y, py)
    mae=mean_absolute_error(y, py)
    if m==True and c==True:
        #print(f"Slope is {model.coef_} and Intercept is {model.intercept_} and Score is {score} and r2_score is {r2} and MsE is {mse} and MAE is {mae}")
        print(f"\033[9mSlop is\033[94m {model.coef_} and \033[92mIntercept is {model.intercept_}\033[92m, \033[94mScore is {score}, \033[94mr2_score is {r2}\033[0m, \033[91m MSE is {mse}\033[91m, \033[91m MAE is {mae}\033[91m")
    else:
        print(f"\033[94mScore is {score}, \033[94mr2_score is {r2}\033[0m, \033[91m MSE is {mse}\033[91m, \033[91m MAE is {mae}\033[91m")


## OLS

In [16]:
lreg=LinearRegression()

In [17]:
lreg.fit(xtrain,ytrain)

In [18]:
watchRegressorResult(xtrain,ytrain,lreg,True,True)

[9mSlop is[94m [19.54151693  5.9562333 ] and [92mIntercept is -243.79306041624102[92m, [94mScore is 0.8973793060969246, [94mr2_score is 0.8973793060969246[0m, [91m MSE is 105.23806184862872[91m, [91m MAE is 8.07654225567556[91m


In [19]:
watchRegressorResult(xtest,ytest,lreg,True,True)

[9mSlop is[94m [19.54151693  5.9562333 ] and [92mIntercept is -243.79306041624102[92m, [94mScore is 0.9059112424422658, [94mr2_score is 0.9059112424422658[0m, [91m MSE is 96.83734437830613[91m, [91m MAE is 7.870097130738321[91m


## KNN Regressor

In [20]:
length=len(xtrain)
sqrtLength=math.sqrt(length)
k=math.floor(sqrtLength)
#k=math.ceil(sqrtLength)

In [21]:
kreg=KNeighborsRegressor(n_neighbors=k)

In [22]:
kreg.fit(xtrain,ytrain)

In [23]:
watchRegressorResult(xtrain,ytrain,kreg,False,False)

[94mScore is 0.8966819338674217, [94mr2_score is 0.8966819338674217[0m, [91m MSE is 105.95322074132991[91m, [91m MAE is 8.089729511191214[91m


In [24]:
watchRegressorResult(xtest,ytest,kreg,False,False)

[94mScore is 0.9038175148962713, [94mr2_score is 0.9038175148962713[0m, [91m MSE is 98.99223536282582[91m, [91m MAE is 7.9548602221273494[91m


## Remark- Both Linear and KNN Regressors give about to similar score