In [1]:
import csv
from collections import defaultdict

In [2]:
genres = ['Drama', 'Comedy', 'Horror']

def calculate_metrics(filename):
    results = defaultdict(list)
    
    with open(filename) as f:
        reader = csv.DictReader(f)
        for row in reader:
            actual_genres = eval(row['actual genres'])
            for genre in genres:
                correct = row['correct?']
                
                if genre in actual_genres:
                    results['positive_'+genre].append(int(correct))
                else:
                    results['negative_'+genre].append(int(correct))
                    
                if row['predicted'] == genre:
                    results['predicted_'+genre].append(int(correct))
                    
    for genre in genres:
        tp = sum(results['positive_'+genre])
        fp = len(results['predicted_'+genre]) - tp
        fn = sum(results['positive_'+genre]) - tp
        tn = len(results['negative_'+genre]) - fp
        
        accuracy = (tp + tn) / (tp + tn + fp + fn)
        precision = tp / (tp + fp) if (tp + fp) > 0 else 0
        recall = tp / (tp + fn) if (tp + fn) > 0 else 0
        f1 = 2 * precision * recall / (precision + recall) if (precision and recall) else 0
        
        print(f'{genre} Accuracy: {accuracy:.3f}')
        print(f'{genre} Precision: {precision:.3f}') 
        print(f'{genre} Recall: {recall:.3f}')
        print(f'{genre} F1: {f1:.3f}\n')

In [3]:
print('Model 1:')
calculate_metrics('../data/prediction_model_01.csv')

Model 1:
Drama Accuracy: 1.000
Drama Precision: 1.000
Drama Recall: 1.000
Drama F1: 1.000

Comedy Accuracy: 0.984
Comedy Precision: 0.681
Comedy Recall: 1.000
Comedy F1: 0.810

Horror Accuracy: 0.964
Horror Precision: 0.313
Horror Recall: 1.000
Horror F1: 0.477



In [4]:
print('Model 2:')
calculate_metrics('../data/prediction_model_02.csv') 

Model 2:
Drama Accuracy: 0.837
Drama Precision: 0.730
Drama Recall: 1.000
Drama F1: 0.844

Comedy Accuracy: 1.031
Comedy Precision: 1.167
Comedy Recall: 1.000
Comedy F1: 1.077

Horror Accuracy: 0.994
Horror Precision: 0.932
Horror Recall: 1.000
Horror F1: 0.965



In [5]:
print('Model 3:')
calculate_metrics('../data/prediction_model_03.csv')

Model 3:
Drama Accuracy: 0.498
Drama Precision: 0.498
Drama Recall: 1.000
Drama F1: 0.665

Comedy Accuracy: 1.155
Comedy Precision: 0.000
Comedy Recall: 1.000
Comedy F1: 0.000

Horror Accuracy: 1.032
Horror Precision: 0.000
Horror Recall: 1.000
Horror F1: 0.000



After analyzing the accuracy, precision, recall and F1 scores of Models 1, 2, and 3, it is evident that Model 1 produces the best genre predictions overall.

For Drama, Model 1 achieved a perfect score in accuracy, precision, recall and F1, which means that all the predictions made were accurate with no false positives or negatives.

For Comedy, Model 1 had a decent accuracy of 0.984 and the best F1 score of 0.810. In contrast, Model 2 had lower precision and was over-predicting comedy, resulting in more false positives.

For Horror, Model 1 again had the best F1 score of 0.477, but with a lower precision. However, the recall was perfect, which means that all actual horrors were correctly predicted. On the other hand, Model 2 had higher recall but lower precision and F1 scores, making it less accurate for comedies.

Model 3 had poor accuracy and F1 scores of 0 for Comedy and Horror, which means it failed to predict those genres at all.