This repository has been archived by the owner on Nov 14, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
average_pred_reg.py
106 lines (92 loc) · 3.61 KB
/
average_pred_reg.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import dateutil
from sklearn.metrics import confusion_matrix
from matplotlib import pyplot as plt
from sklearn.utils.multiclass import unique_labels
def plot_confusion_matrix(y_true, y_pred, classes,
normalize=False,
title=None,
cmap=plt.cm.Blues):
"""
This function prints and plots the confusion matrix.
Normalization can be applied by setting `normalize=True`.
"""
if not title:
if normalize:
title = 'Normalized confusion matrix'
else:
title = 'Confusion matrix, without normalization'
# Compute confusion matrix
cm = confusion_matrix(y_true, y_pred)
# Only use the labels that appear in the data
classes = classes[unique_labels(y_true, y_pred)]
if normalize:
cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
print("Normalized confusion matrix")
else:
print('Confusion matrix, without normalization')
print(cm)
acc = (cm[0][0]+cm[1][1]+cm[2][2])/(cm[0][0]+cm[1][1]+cm[2][2]+cm[0][1]+cm[0][2]+cm[1][0]+cm[1][2]+cm[2][0]+cm[2][1])
print(acc)
fig, ax = plt.subplots()
im = ax.imshow(cm, interpolation='nearest', cmap=cmap)
ax.figure.colorbar(im, ax=ax)
# We want to show all ticks...
ax.set(xticks=np.arange(cm.shape[1]),
yticks=np.arange(cm.shape[0]),
# ... and label them with the respective list entries
xticklabels=classes, yticklabels=classes,
title=title,
ylabel='True label',
xlabel='Predicted label')
# Rotate the tick labels and set their alignment.
plt.setp(ax.get_xticklabels(), rotation=45, ha="right",
rotation_mode="anchor")
# Loop over data dimensions and create text annotations.
fmt = '.2f' if normalize else 'd'
thresh = cm.max() / 2.
for i in range(cm.shape[0]):
for j in range(cm.shape[1]):
ax.text(j, i, format(cm[i, j], fmt),
ha="center", va="center",
color="white" if cm[i, j] > thresh else "black")
fig.tight_layout()
return ax
df = pd.read_csv('/home/giandbt/Documents/hack/team_garbage/data/clean_datav6.csv')
df = df.sample(frac=1).reset_index(drop=True)
y_train = df['cci'].values
X_train = df['comb_ids'].values
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train , train_size = 0.9, random_state = 90)
classes = np.unique(X_train)
predictions = {}
for id in classes:
cci_sum = 0
n = 0
for idx, feature in enumerate(X_train):
if feature == id:
n += 1
cci_sum += y_train[idx]
predictions[id] = cci_sum/n
low = 3
mid = 4
size_val = len(X_val)
y_preds = []
for idx, feature in enumerate(X_val):
try:
y_pred = predictions[feature]
y_preds.append(y_pred)
except:
y_preds.append(4)
summation = 0
summation_mae = 0
for i in range (0,size_val): #looping through each element of the list
difference = abs(y_preds[i] - y_val[i]) #finding the difference between observed and predicted value
squared_difference = difference**2 #taking square of the differene
summation = summation + squared_difference #taking a sum of all the differences
summation_mae = summation_mae + difference #taking a sum of all the differences
MSE = summation/size_val #dividing summation by total values to obtain average
MAE = summation_mae/size_val #dividing summation by total values to obtain average
print("The Mean Square Error is: " , MSE)
print("The Mean Absolute Error is: " , MAE)