# Notebook 5 - Model Explanation
This notebook will highlight the key words for each predicted genre to aid model explainability.

In [36]:
import numpy as np
import pandas as pd
from ast import literal_eval
import pickle
import eli5
from IPython.display import display, HTML

import warnings
warnings.filterwarnings("ignore")

In [6]:
# Import trained model and fitted TF-IDF Vectorizer
model = pickle.load(open('templates/model.pkl', 'rb'))
vectorizer = pickle.load(open('templates/vectorizer.pkl','rb'))

In [34]:
# Display overall word contribution to each genre
class_names = [["Not Country", "Country"], 
               ["Not Pop", "Pop"], 
               ["Not R&B", "R&B"], 
               ["Not Rap", "Rap"], 
               ["Not Rock", "Rock"]]

for i in range(0,5):
    display(HTML(
        eli5.show_weights(
            model.estimators_[i],
            top = 10,
            vec = vectorizer,
            target_names = class_names[i]
        ).data)
    )        

Weight?,Feature
+12.854,thorns
+5.585,mmh
+3.980,hanging
+3.662,everyones
+3.319,planned
+3.181,whiskey
+3.125,dreaming
… 8796 more positive …,… 8796 more positive …
… 18857 more negative …,… 18857 more negative …
-3.123,also


Weight?,Feature
+7.181,mm
+5.665,hangin
+4.747,manipulate
+4.426,sang
+4.146,stress
… 10663 more positive …,… 10663 more positive …
… 22732 more negative …,… 22732 more negative …
-3.796,perfеct
-3.798,likecause
-3.817,hang


Weight?,Feature
+4.164,mm
+3.119,club
+3.027,issues
… 8712 more positive …,… 8712 more positive …
… 23333 more negative …,… 23333 more negative …
-2.977,bossed
-3.026,drowning
-3.116,stuntin
-3.139,sing
-3.430,someday


Weight?,Feature
+2.980,closed
+2.908,rap
+2.579,bossed
+2.473,uh
+2.391,bruno
+2.330,closer
+2.301,stressed
+2.287,bronx
+2.258,loner
+2.239,flexin


Weight?,Feature
+4.816,blocked
+3.415,beating
+3.113,colder
… 8093 more positive …,… 8093 more positive …
… 22601 more negative …,… 22601 more negative …
-3.109,block
-3.257,laa
-3.539,misunderstood
-3.597,uh
-4.975,thе


In [37]:
preproc_df = pd.read_csv("data/genre_prepped.csv.gz", compression = "gzip",
                         converters = {"tokens": literal_eval, "genre" : literal_eval})

In [54]:
# Display word contributions for a single prediction/song
for i in range(0,5):
    display(HTML(
        eli5.show_prediction(
            model.estimators_[i],
            # Test with first song - genre = country
            # 5 outputs should be: country, not pop, not R&B, not rap, and not rock
            preproc_df["lyrics_clean"].iloc[0],
            vec = vectorizer,
            target_names = class_names[i]
        ).data)
    )      

Contribution?,Feature
1.571,Highlighted in text (sum)
-0.571,<BIAS>


Contribution?,Feature
0.723,<BIAS>
0.599,Highlighted in text (sum)


Contribution?,Feature
0.773,<BIAS>
0.227,Highlighted in text (sum)


Contribution?,Feature
1.046,<BIAS>
-0.02,Highlighted in text (sum)


Contribution?,Feature
0.863,Highlighted in text (sum)
0.137,<BIAS>
