In [1]:
import pandas as pd
import numpy as np
import random
import sklearn.metrics as metrics

In [2]:
df = pd.read_excel('Example_ModelEval.xlsx')
df.head()

Unnamed: 0,Hours Studied,Exam Score,Predicted Score,Actual Pass?,Predicted Pass?
0,1,68,79.03,N,Y
1,1,78,79.03,N,Y
2,1,75,79.03,N,Y
3,2,83,82.11,Y,Y
4,2,80,82.11,Y,Y


In [3]:
df1 = df.copy()
df1 = df1.assign(Absolute_Error = np.abs(df1['Predicted Score'] - df1['Exam Score']))
df1 = df1.assign(Relative_Error = np.abs(df1['Predicted Score'] - df1['Exam Score'])/df1['Exam Score'])
df1 = df1.assign(Square_Error = np.square(df1['Predicted Score'] - df1['Exam Score']))
df1

Unnamed: 0,Hours Studied,Exam Score,Predicted Score,Actual Pass?,Predicted Pass?,Absolute_Error,Relative_Error,Square_Error
0,1,68,79.03,N,Y,11.03,0.162206,121.6609
1,1,78,79.03,N,Y,1.03,0.013205,1.0609
2,1,75,79.03,N,Y,4.03,0.053733,16.2409
3,2,83,82.11,Y,Y,0.89,0.010723,0.7921
4,2,80,82.11,Y,Y,2.11,0.026375,4.4521
5,2,78,82.11,N,Y,4.11,0.052692,16.8921
6,2,89,82.11,Y,Y,6.89,0.077416,47.4721
7,2,93,82.11,Y,Y,10.89,0.117097,118.5921
8,3,90,85.19,Y,Y,4.81,0.053444,23.1361
9,3,91,85.19,Y,Y,5.81,0.063846,33.7561


In [4]:
print("Evaluation of model prediction of exam scores (Manually):")
print("Relative Error:",df1['Relative_Error'].sum())
print("Absolute Error:",df1['Absolute_Error'].sum())
print("MAE:",df1['Absolute_Error'].sum()/df1.shape[0])
print("MSE:", df1['Square_Error'].sum()/df1.shape[0])
print("RMSE:", np.sqrt(df1['Square_Error'].sum()/df.shape[0]))

Evaluation of model prediction of exam scores (Manually):
Relative Error: 0.8368377426829996
Absolute Error: 69.81
MAE: 4.654
MSE: 32.276046666666666
RMSE: 5.681201164073198


In [5]:
mae = metrics.mean_absolute_error(df1['Exam Score'], df1['Predicted Score'])
mse = metrics.mean_squared_error(df1['Predicted Score'], df1['Exam Score'])
rmse = np.sqrt(mse) # or mse**(0.5)  
r2 = metrics.r2_score(df1['Predicted Score'], df1['Exam Score'])

print("Evaluation of model prediction of exam scores:")
print("MAE:",mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("R-Squared:", r2)

Evaluation of model prediction of exam scores:
MAE: 4.654
MSE: 32.276046666666666
RMSE: 5.681201164073198
R-Squared: -0.31083664184285786


In [6]:
print("Evaluation of model prediction of Pass?:")
print("Accuracy:",metrics.accuracy_score(df1['Actual Pass?'], df1['Predicted Pass?']))
print("Recall:", metrics.recall_score(df1['Actual Pass?'], df1['Predicted Pass?'], pos_label='Y'))
print("Precision:", metrics.precision_score(df1['Actual Pass?'], df1['Predicted Pass?'],pos_label='Y'))
print("F1:", metrics.f1_score(df1['Actual Pass?'], df1['Predicted Pass?'],pos_label='Y'))

Evaluation of model prediction of Pass?:
Accuracy: 0.7333333333333333
Recall: 1.0
Precision: 0.7333333333333333
F1: 0.846153846153846


---- 

## Addinng more data randomly

In [22]:
df2 = df.copy()
for _ in range(1,15):
    df2 = df2.append({'Hours Studied':random.randint(1, 6), 'Exam Score':round(random.uniform(df['Exam Score'].min()+10, df['Exam Score'].max()-10), 1), \
               'Predicted Score': round(random.uniform(df['Exam Score'].min()+10, df['Exam Score'].max()-10), 1),\
               'Actual Pass?':random.choices(['Y','N'],weights=[0.89,0.11])[0], 'Predicted Pass?':random.choices(['Y','N'],weights=[0.89,0.11])[0]},ignore_index = True)
df2.tail()

Unnamed: 0,Hours Studied,Exam Score,Predicted Score,Actual Pass?,Predicted Pass?
24,3,81.4,82.8,Y,Y
25,4,80.5,78.6,Y,Y
26,3,83.0,80.2,Y,Y
27,4,79.6,78.6,Y,Y
28,4,80.4,78.1,Y,N


In [23]:
mae = metrics.mean_absolute_error(df2['Exam Score'], df2['Predicted Score'])
mse = metrics.mean_squared_error(df2['Predicted Score'], df2['Exam Score'])
rmse = np.sqrt(mse) # or mse**(0.5)  
r2 = metrics.r2_score(df2['Predicted Score'], df2['Exam Score'])

print("- Evaluation of model prediction of exam scores:")
print("MAE:",mae)
print("MSE:", mse)
print("RMSE:", rmse)
print("R-Squared:", r2)

print("")

print("- Evaluation of model prediction of Pass?:")
print("Accuracy:",metrics.accuracy_score(df2['Actual Pass?'], df2['Predicted Pass?']))
print("Recall:", metrics.recall_score(df2['Actual Pass?'], df2['Predicted Pass?'], pos_label='Y'))
print("Precision:", metrics.precision_score(df2['Actual Pass?'], df2['Predicted Pass?'],pos_label='Y'))
print("F1:", metrics.f1_score(df2['Actual Pass?'], df2['Predicted Pass?'],pos_label='Y'))

- Evaluation of model prediction of exam scores:
MAE: 3.4003448275862067
MSE: 19.365541379310343
RMSE: 4.400629657141162
R-Squared: 0.006454226692451326

- Evaluation of model prediction of Pass?:
Accuracy: 0.8275862068965517
Recall: 0.9583333333333334
Precision: 0.8518518518518519
F1: 0.9019607843137256


In [24]:
df2

Unnamed: 0,Hours Studied,Exam Score,Predicted Score,Actual Pass?,Predicted Pass?
0,1,68.0,79.03,N,Y
1,1,78.0,79.03,N,Y
2,1,75.0,79.03,N,Y
3,2,83.0,82.11,Y,Y
4,2,80.0,82.11,Y,Y
5,2,78.0,82.11,N,Y
6,2,89.0,82.11,Y,Y
7,2,93.0,82.11,Y,Y
8,3,90.0,85.19,Y,Y
9,3,91.0,85.19,Y,Y
