# Supervised Learning Model Evaluation Lab

Complete the exercises below to solidify your knowledge and understanding of supervised learning model evaluation.

In [3]:
import pandas as pd
from sklearn.model_selection import train_test_split

## Regression Model Evaluation

In [2]:
from sklearn.datasets import load_boston

data = load_boston()

X = pd.DataFrame(data["data"], columns=data["feature_names"])
y = pd.DataFrame(data["target"], columns=['MEDV'])

data = pd.concat([X, y], axis=1)

## 1. Split this data set into training (80%) and testing (20%) sets.

The `MEDV` field represents the median value of owner-occupied homes (in $1000's) and is the target variable that we will want to predict.

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

## 2. Train a `LinearRegression` model on this data set and generate predictions on both the training and the testing set.

In [6]:
from sklearn.linear_model import LinearRegression
reg = LinearRegression().fit(X_train, y_train)
pred_train=reg.predict(X_train)
pred_test=reg.predict(X_test)

## 3. Calculate and print R-squared for both the training and the testing set.

In [15]:
from sklearn.metrics import r2_score
r2train = r2_score(y_train, pred_train)
r2test = r2_score(y_test, pred_test)
print(f"r2train: {r2train}\nr2test: {r2test}")

r2train: 0.74564304327671
r2test: 0.7176766319861383


## 4. Calculate and print mean squared error for both the training and the testing set.

In [13]:
from sklearn.metrics import mean_squared_error
rmsetrain = mean_squared_error(y_train, pred_train)
rmsetest = mean_squared_error(y_test, pred_test)
print(f"rmse train: {rmsetrain}\nrmse test: {rmsetest}")

rmse train: 20.062600619548196
rmse test: 29.883158112070234


## 5. Calculate and print mean absolute error for both the training and the testing set.

In [14]:
from sklearn.metrics import mean_absolute_error
msetrain = mean_absolute_error(y_train, pred_train)
msetest = mean_absolute_error(y_test, pred_test)
print(f"mse train: {msetrain}\nmse test: {msetest}")

mse train: 3.168507898447291
mse test: 3.6112450193522636


## Classification Model Evaluation

In [20]:
from sklearn.datasets import load_iris

data2 = load_iris()

X2 = pd.DataFrame(data2["data"], columns=data2["feature_names"])
y2 = pd.DataFrame(data2["target"], columns=["class"])
data2 = pd.concat([X2, y2], axis=1)

## 6. Split this data set into training (80%) and testing (20%) sets.

The `class` field represents the type of flower and is the target variable that we will want to predict.

In [24]:
X_train2, X_test2, y_train2, y_test2 = train_test_split(X2, y2, test_size=0.20)

## 7. Train a `LogisticRegression` model on this data set and generate predictions on both the training and the testing set.

In [26]:
from sklearn.linear_model import LogisticRegression
log = LogisticRegression().fit(X_train2, y_train2)
pred_train2=log.predict(X_train2)
pred_test2=log.predict(X_test2)

  y = column_or_1d(y, warn=True)
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


## 8. Calculate and print the accuracy score for both the training and the testing set.

In [28]:
from sklearn.metrics import accuracy_score
acc_train = accuracy_score(y_train2, pred_train2)
acc_test = accuracy_score(y_test2, pred_test2)
print (f'Acc train{acc_train}')
print (f'Acc test {acc_test}')

Acc train0.9833333333333333
Acc test 0.9666666666666667


## 9. Calculate and print the balanced accuracy score for both the training and the testing set.

In [30]:
from sklearn.metrics import balanced_accuracy_score
bal_acc_train = balanced_accuracy_score(y_train2, pred_train2)
bal_acc_test = balanced_accuracy_score(y_test2, pred_test2)
print (f'Acc train {bal_acc_train}')
print (f'Acc test {bal_acc_test}')

Acc train0.9803921568627452
Acc test 0.9791666666666666


## 10. Calculate and print the precision score for both the training and the testing set.

In [40]:
from sklearn.metrics import precision_score
prescore_train = precision_score(y_train2, pred_train2,pos_label='positive',average='macro')
prescore_test = precision_score(y_test2, pred_test2,pos_label='positive',average='macro')
print (f'Precission score train {prescore_train}')
print (f'Precission score test {prescore_test}')


Precission score train 0.9858156028368795
Precission score test 0.9444444444444445


## 11. Calculate and print the recall score for both the training and the testing set.

In [42]:
from sklearn.metrics import recall_score
recallscore_train = recall_score(y_train2,pred_train2,average='macro')
recallscore_test = recall_score(y_test2,pred_test2,average='macro')
print(f"Recall score train {recallscore_train}")
print(f"Recall score test {recallscore_test}")

Recall score train 0.9803921568627452
Recall score test 0.9791666666666666


## 12. Calculate and print the F1 score for both the training and the testing set.

In [43]:
from sklearn.metrics import f1_score
f1_score_train_macro = f1_score(y_train2,pred_train2,average='macro')
f1_score_test_macro = f1_score(y_test2,pred_test2,average='macro')
f1_score_train_micro = f1_score(y_train2,pred_train2,average='micro')
f1_score_test_micro = f1_score(y_test2,pred_test2,average='micro')
f1_score_train_weighted = f1_score(y_train2,pred_train2,average='weighted')
f1_score_test_weighted = f1_score(y_test2,pred_test2,average='weighted')

print(f"F1 score train macro {f1_score_train_macro}")
print(f"F1 score test macro {f1_score_train_macro}")
print(f"F1 score train micro {f1_score_train_micro}")
print(f"F1 score test micro {f1_score_train_micro}")
print(f"F1 score train weighted {f1_score_train_weighted}")
print(f"F1 score test weighted {f1_score_train_weighted}")

F1 score train macro 0.9826526130873957
F1 score test macro 0.9826526130873957
F1 score train micro 0.9833333333333333
F1 score test micro 0.9833333333333333
F1 score train weighted 0.983261967501098
F1 score test weighted 0.983261967501098


## 13. Generate confusion matrices for both the training and the testing set.

In [44]:
from sklearn.metrics import confusion_matrix

In [45]:
confusion_matrix(y_train2,pred_train2)

array([[41,  0,  0],
       [ 0, 32,  2],
       [ 0,  0, 45]])

## Bonus: For each of the data sets in this lab, try training with some of the other models you have learned about, recalculate the evaluation metrics, and compare to determine which models perform best on each data set.