In [12]:
import json
from random import randrange
from sklearn.metrics import classification_report

In [13]:
with open('../../data/lyrics.json', 'r', encoding='utf8') as f:
    dataset = json.load(f)
       
import pandas as pd
labeled_songs = pd.read_csv("labeled_songs.csv")
    
import ast
data = []
with open("songs_to_label.txt", "r", encoding='utf8') as inFile:
    data = ast.literal_eval(inFile.read())

In [14]:
## defining two dictionaries for bidirectional mapping between int & class
class_to_int = {'neutral' : 0, 'liebevoll' : 1, 'gewalttätig' : 2, 'rassistisch' : 3,
                   'homophob' : 4, 'frauenfeindlich' : 5, 'freundlich' : 6, 'positiv' : 7, 'traurig' : 8}
int_to_class = {v: k for k, v in class_to_int.items()}

In [15]:
## function to get the highest scoring class for a song
def get_highest_class(song):
    max_class = 0
    highest_score = 0
    for key, value in song['total_class_score'].items():
        if highest_score < value:
            max_class = class_to_int[key]
            highest_score = value
    return max_class

## find a song in the dataset by its genius_track_id
def find_song(dataset, idx):
    for song in dataset:
        if song['genius_track_id'] == idx:
            return song
    print("not found")

In [16]:
## randomly sample n songs for each class for a total of 9*n (9 classes)
import random

total = 0
n = 10
attempts = 0
random.seed(237)

samples = []
for i in range(9):
    elist = []
    samples.append(elist)
while total < 9*n and attempts < 1000:
    idx = random.randrange(len(dataset))
    class_number = get_highest_class(dataset[idx])
    if (not dataset[idx] in samples[class_number]) and (len(samples[class_number]) < n):
        samples[class_number].append(dataset[idx])
        total += 1
    attempts += 1

In [17]:
## get all the songs, that were manually labeled, into a list for the evaluation
## also sets a new attribute 'top_class' for each song for the highest scoring class
songs_list = []
for idx in labeled_songs['id']:
    song = find_song(dataset, idx)
    top_class = get_highest_class(song)
    song['top_class'] = top_class
    songs_list.append(song)

In [18]:
## creating the different prediction & groundtruth (our labels) lists.

classification_prediction = []
sentiment_prediction = []
toxicity_prediction = []
classification_truth = []
sentiment_truth = []
toxicity_truth = []

for idx, song in enumerate(songs_list):
    classification_prediction.append(song['top_class'])
    sentiment = song['sentiment_value']
    if sentiment < 0:
        sentiment = -1
    else:
        sentiment = 1
    sentiment_prediction.append(sentiment)
    toxicity = song['toxicity_value']
    if toxicity >= 0.5:
        toxicity = 1
    else:
        toxicity = -1
    toxicity_prediction.append(toxicity)
    classification_truth.append(labeled_songs.iloc[idx]['classifier'])
    sentiment_truth.append(labeled_songs.iloc[idx]['sentiment'])
    toxicity_truth.append(labeled_songs.iloc[idx]['toxicity'])

In [19]:
## we're interested in the macro-avg
print("classifier:")
print(classification_report(classification_truth, classification_prediction))
print("sentiment:")
print(classification_report(sentiment_truth, sentiment_prediction))
print("toxicity:")
print(classification_report(toxicity_truth, toxicity_prediction))

classifier:
              precision    recall  f1-score   support

           0       0.22      0.22      0.22         9
           1       0.70      0.78      0.74         9
           2       0.67      0.40      0.50        15
           3       0.00      0.00      0.00         1
           5       0.33      0.60      0.43         5
           6       0.00      0.00      0.00         0
           7       0.36      0.50      0.42         8
           8       1.00      0.48      0.65        21

    accuracy                           0.47        68
   macro avg       0.41      0.37      0.37        68
weighted avg       0.65      0.47      0.52        68

sentiment:
              precision    recall  f1-score   support

          -1       0.63      0.98      0.77        42
           1       0.67      0.08      0.14        26

    accuracy                           0.63        68
   macro avg       0.65      0.53      0.45        68
weighted avg       0.64      0.63      0.53        68


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
## calculating the number of songs labeled per each class
## just some general info, not necessary for the evaluation

class_scores = {0 : 0, 1 : 0, 2 : 0, 3 : 0,
                   4 : 0, 5 : 0, 6 : 0, 7 : 0, 8 : 0}

for song in dataset:
    max_class = 0
    highest_score = 0
    for key, value in song['total_class_score'].items():
        if highest_score < value:
            max_class = class_to_int[key]
            highest_score = value
    class_scores[max_class] += 1

## convert
class_scores = {int_to_class[k]: v for k, v in class_scores.items()}