# Imports

In [134]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split as tts
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

from sklearn.neighbors import KNeighborsRegressor
from sklearn.neighbors import KNeighborsClassifier


# Step 01: Regression

In [135]:
df = pd.read_csv('weight-height.csv')
df.head()

Unnamed: 0,Gender,Height,Weight
0,Male,73.847017,241.893563
1,Male,68.781904,162.310473
2,Male,74.110105,212.740856
3,Male,71.730978,220.04247
4,Male,69.881796,206.349801


In [136]:
df_enc = df.copy()
df_lr = df.copy()
df_knnr = df.copy()
df_knnC = df.copy()

In [137]:
df.isnull().sum()

Gender    0
Height    0
Weight    0
dtype: int64

In [138]:
df.Gender.value_counts()

Male      5000
Female    3555
Name: Gender, dtype: int64

In [139]:
le = LabelEncoder()
df_enc.Gender = le.fit_transform(df_enc.Gender)

In [140]:
df_enc.Gender.value_counts()

1    5000
0    3555
Name: Gender, dtype: int64

2. Separate x(Gender, Height) and y (y=Weight)
3. Train = 70%, Test = 30%
4. Apply Linear Regression
5. Evaluate the Model (Testing and training Accuracy, MSE for testing)
6. Apply KNN Regressor: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html
7. Evaluate the Model (Testing and training Accuracy, MSE for testing)
8. Compare KNN & Linear regression with the KNN model and Linear regression as well.


Work from here..

2. Separate x(Gender, Height) and y (y=Weight)

In [141]:
x = df_enc.drop('Weight', axis=1)
y = df_enc[['Weight']]
x.head()

Unnamed: 0,Gender,Height
0,1,73.847017
1,1,68.781904
2,1,74.110105
3,1,71.730978
4,1,69.881796


In [142]:
y.head()

Unnamed: 0,Weight
0,241.893563
1,162.310473
2,212.740856
3,220.04247
4,206.349801


3. Train = 70%, Test = 30%

In [143]:
xtrain, xtest, ytrain, ytest = tts(x,y,test_size=0.30, random_state=1)

4. Apply Linear Regression

In [144]:
reg = LinearRegression()

In [145]:
reg.fit(xtrain, ytrain)

LinearRegression()

In [146]:
reg.predict([[1, 241.893563]]) #Checking Prediction for our data



array([[1218.62307606]])

In [147]:
reg.coef_ #For our 2 variable m1 and m2 are calculated

array([[19.34359322,  5.96887438]])

In [148]:
reg.intercept_ #Y Intercept

array([-244.55280903])

In [149]:
reg.predict(xtest) # Prediction for testing data

array([[143.83013436],
       [197.53417989],
       [184.63515112],
       ...,
       [160.7078525 ],
       [186.82852694],
       [201.4226542 ]])

5. Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [150]:
#testing MSE
test_mse = mean_squared_error(ytest, reg.predict(xtest)) #ytest is actual y, reg.predict(xtest) is actually predicted y
test_mse

99.6725099807003

In [151]:
#trainin MSE
train_mse = mean_squared_error(ytrain, reg.predict(xtrain))
train_mse

104.03843190905906

6. Apply KNN Regressor: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsRegressor.html

In [152]:
neigh = KNeighborsRegressor(n_neighbors=3)
neigh.fit(xtrain, ytrain)

KNeighborsRegressor(n_neighbors=3)

In [153]:
neigh.predict([[1, 241.893563]]) #Checking Prediction for our data



array([[231.2480519]])

7. Evaluate the Model (Testing and training Accuracy, MSE for testing)

In [154]:
#testing MSE
knn_test_mse = mean_squared_error(ytest, neigh.predict(xtest)) #ytest is actual y, reg.predict(xtest) is actually predicted y
knn_test_mse

138.90129829085723

In [155]:
#trainin MSE
knn_train_mse = mean_squared_error(ytrain, neigh.predict(xtrain))
knn_train_mse

68.70923738312098

8. Compare KNN & Linear regression with the KNN model and Linear regression as well.

In [156]:
reg.score(xtrain, ytrain) # Training accuracy

0.8972135459668117

In [157]:
reg.score(xtest, ytest) # Testing accuracy

0.9059959607091161

In [158]:
neigh.score(xtrain, ytrain)# Training accuracy

0.9321175959657978

In [159]:
neigh.score(xtest, ytest)# Testing accuracy

0.8689981509985368

In [160]:
if(reg.score(xtrain, ytrain)>neigh.score(xtrain, ytrain)):
    print('Based on training data linear regression is more accurate than KNN Regression')
else:
    print('Based on training data KNN Regression is more accurate than linear regression')


Based on training data KNN Regression is more accurate than linear regression


In [161]:
if(reg.score(xtest, ytest)>neigh.score(xtest, ytest)):
    print('Based on testing data linear regression is more accurate than KNN Regression')
else:
    print('Based on testing data KNN Regression is more accurate than linear regression')

Based on testing data linear regression is more accurate than KNN Regression


# Step 02: Classification

In [162]:
df_enc.head()

Unnamed: 0,Gender,Height,Weight
0,1,73.847017,241.893563
1,1,68.781904,162.310473
2,1,74.110105,212.740856
3,1,71.730978,220.04247
4,1,69.881796,206.349801


2. Separate x and (y=Gender)
3. Train = 70%, Test = 30%
4. Apply KNN Classifier 
5. Evaluate the Model by only Accuracy.
6. Apply KNN Classifier: https://scikit-learn.org/stable/modules/generated/sklearn.neighbors.KNeighborsClassifier.html

Work from here..

2. Separate x and (y=Gender)

In [163]:
knn_c_x = df_enc.drop('Gender', axis=1)
knn_c_y = df_enc[['Gender']]
knn_c_x.head()

Unnamed: 0,Height,Weight
0,73.847017,241.893563
1,68.781904,162.310473
2,74.110105,212.740856
3,71.730978,220.04247
4,69.881796,206.349801


In [168]:
knn_c_y.head()

Unnamed: 0,Gender
0,1
1,1
2,1
3,1
4,1


3. Train = 70%, Test = 30%

In [169]:
knn_c_xtrain, knn_c_xtest, knn_c_ytrain, knn_c_ytest = tts(knn_c_x,knn_c_y,test_size=0.30, random_state=1)

4. Apply KNN Classifier 

In [170]:
knn_c_neigh = KNeighborsClassifier(n_neighbors=5)
knn_c_neigh.fit(knn_c_xtrain, knn_c_ytrain)

  return self._fit(X, y)


KNeighborsClassifier()

In [173]:
knn_c_neigh.predict([[73.847017, 241.893563]])



array([1])

Evaluate the Model by only Accuracy.

In [174]:
knn_c_neigh.score(knn_c_xtrain, knn_c_ytrain)# Training accuracy

0.9263527054108216

In [175]:
knn_c_neigh.score(knn_c_xtest, knn_c_ytest)# Testing accuracy

0.9146864043630697

In [177]:
print('Based On KNN Classifier on our training data, our model performance is', knn_c_neigh.score(knn_c_xtrain, knn_c_ytrain)*100,'%')

Based On KNN Classifier on our training data, our model performance is 92.63527054108216 %


In [178]:
print('Based On KNN Classifier on our testing data, our model performance is', knn_c_neigh.score(knn_c_xtest, knn_c_ytest)*100,'%')

Based On KNN Classifier on our testing data, our model performance is 91.46864043630697 %
