In [1]:
import pandas as pd
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [2]:
def comparerer(truth, predictions):
	if len(predictions) != len(truth):
		print('Lengths don\'t match!')
		print(len(predictions))
		print(len(truth))
		return 0,0,0

	else:
		pos_correct = 0
		pos_wrong = 0
		neg_correct = 0
		neg_wrong = 0

		for i in range(len(truth)):
			if int(predictions[i]) == int(truth[i]):
				if int(truth[i]) == 1:
					pos_correct += 1
				else:
					neg_correct += 1
			else:
				if int(truth[i]) == 1:
					pos_wrong += 1
				else:
					neg_wrong +=1

		return pos_correct, pos_wrong, neg_correct, neg_wrong, (100*(pos_correct+neg_correct))/len(truth)



In [36]:
subject = pd.read_json(f'../data/predictions/music_test.json', lines=True)

pred_y = []

for x in subject['sentiment']:
    if x == 'positive':
        pred_y.append(1)
    else:
        pred_y.append(0)
        
len(pred_y)

10000

In [37]:
print(f'There are {sum(pred_y)} predicted positives')

There are 5954 predicted positives


In [38]:
subject_true = pd.read_csv(f'../data/interim/music_test.csv', header=None)
subject_true.columns=['text', 'label']

In [39]:
print(f'There are {subject_true["label"].sum()} actual positives')

There are 5945 actual positives


In [40]:
pc, pw, nc, nw, acc = comparerer(subject_true['label'], pred_y)

In [41]:
print(f'There are {pc + nc} correct')
print(f'There are {pw + nw} incorrect')

print(f'\nAccuracy:        {acc}%')
print(f'True positives:  {pc}')
print(f'True negatives:  {nc}')
print(f'False positives: {nw}')
print(f'False negatives: {pw}')


There are 9377 correct
There are 623 incorrect

Accuracy:        93.77%
True positives:  5638
True negatives:  3739
False positives: 316
False negatives: 307


In [46]:
f1 = f1_score(subject_true['label'], pred_y, average='weighted')
print(f'F1:              {round(f1, 3)}')

p = precision_score(subject_true['label'], pred_y, average='weighted')
print(f'Precision:       {round(p,3)}')

r = recall_score(subject_true['label'], pred_y, average='weighted')
print(f'Recall:          {round(r,3)}')

F1:              0.938
Precision:       0.938
Recall:          0.938


In [50]:
data = {'music_base': ['music', 'ba', '00000', pc + nc, pw + nw, sum(pred_y), subject_true["label"].sum(), pc, nc, nw, pw, acc, p, r, f1]}
columns = ['domain', 'trial_type', 'add_data', 'correctly_predicted', 'incorrectly_predicted', 'total_predicted_positives', 'ground_truth_positives', 'TP', 'TN', 'FP', 'FN', 'accuracy', 'precision', 'recall', 'f1']

df2 = pd.DataFrame.from_dict(data, orient='index', columns=columns)

In [51]:
df2

Unnamed: 0,domain,trial_type,add_data,correctly_predicted,incorrectly_predicted,total_predicted_positives,ground_truth_positives,TP,TN,FP,FN,accuracy,precision,recall,f1
music_base,music,ba,0,9377,623,5954,5945,5638,3739,316,307,93.77,0.937679,0.9377,0.937689


In [52]:
df2.to_csv(f'../report/metrics/music_test_metrics.csv', index=False)