In [11]:
from sklearn.ensemble import RandomForestClassifier as rcf
import pandas as pd
from TrackManager import TrackManager
from sklearn.model_selection import train_test_split
from sklearn import metrics


In [52]:
df = pd.read_csv('rapnotrap.csv')
df2 = pd.read_csv('songs.csv')
df3 = pd.read_csv('no_cap.csv')

In [71]:
df = pd.read_csv('songs_for_training.csv')

In [72]:
X_train, X_test, y_train, y_test = train_test_split(df.iloc[:, 1:-1], df.iloc[:, -1], test_size=0.20, shuffle=True, random_state=20)

In [73]:
def rcf_classifier(n, criterion, max_leaf_nodes):
    classifier = rcf(n_estimators=n, criterion=criterion,
                     n_jobs=-1, max_leaf_nodes=max_leaf_nodes)
    classifier.fit(X_train, y_train)
    return classifier


In [74]:

for n in range(10, 100, 10):
    classifier = rcf_classifier(n, 'gini', 2)
    y_pred = classifier.predict(X_test)
    print("Classification_report with " + str(n) + " estimators: ")
    print('_____________________________________________')
    print(str(metrics.classification_report(y_test, y_pred)))
    




Classification_report with 10 estimators: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.80      0.81      0.80        58
         rap       0.79      0.77      0.78        53

    accuracy                           0.79       111
   macro avg       0.79      0.79      0.79       111
weighted avg       0.79      0.79      0.79       111

Classification_report with 20 estimators: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.78      0.91      0.84        58
         rap       0.88      0.72      0.79        53

    accuracy                           0.82       111
   macro avg       0.83      0.82      0.82       111
weighted avg       0.83      0.82      0.82       111

Classification_report with 30 estimators: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.76 

In [75]:
for n in range(2, 32, 2):
    classifier = rcf_classifier(30, 'gini', n)
    y_pred = classifier.predict(X_test)
    print("Classification_report with " + str(n) + " max_leaf_nodes: ")
    print('_____________________________________________')
    print(str(metrics.classification_report(y_test, y_pred)))

Classification_report with 2 max_leaf_nodes: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.70      0.98      0.81        58
         rap       0.97      0.53      0.68        53

    accuracy                           0.77       111
   macro avg       0.83      0.76      0.75       111
weighted avg       0.82      0.77      0.75       111

Classification_report with 4 max_leaf_nodes: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.82      0.86      0.84        58
         rap       0.84      0.79      0.82        53

    accuracy                           0.83       111
   macro avg       0.83      0.83      0.83       111
weighted avg       0.83      0.83      0.83       111

Classification_report with 6 max_leaf_nodes: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap   

In [76]:
for criterion in ['gini', 'entropy', 'log_loss']:
    classifier = rcf_classifier(30, criterion, 6)
    y_pred = classifier.predict(X_test)
    print("Classification_report with " + criterion + " criterion: ")
    print('_____________________________________________')
    print(str(metrics.classification_report(y_test, y_pred)))


Classification_report with gini criterion: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.82      0.88      0.85        58
         rap       0.86      0.79      0.82        53

    accuracy                           0.84       111
   macro avg       0.84      0.84      0.84       111
weighted avg       0.84      0.84      0.84       111

Classification_report with entropy criterion: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap       0.82      0.88      0.85        58
         rap       0.86      0.79      0.82        53

    accuracy                           0.84       111
   macro avg       0.84      0.84      0.84       111
weighted avg       0.84      0.84      0.84       111

Classification_report with log_loss criterion: 
_____________________________________________
              precision    recall  f1-score   support

     not_rap  

In [77]:
x = df.iloc[:, 1:-1] 
y = df.iloc[:, -1]
prod_classifier = rcf(n_estimators=30, criterion='log_loss',
                      n_jobs=-1, max_leaf_nodes=6)
prod_classifier.fit(x, y)

In [78]:
currents = pd.read_csv('currents_info.csv')
t_m = TrackManager()
names = t_m.get_names(currents.iloc[:,0])

In [79]:
predictions = prod_classifier.predict(currents.iloc[:, 1:])
for i in range(len(names)):
    print('Prediction for ' + names[i] + ' is ' + predictions[i])


Prediction for SHOOT! is not_rap
Prediction for Gone Girl is not_rap
Prediction for Yuck is not_rap
Prediction for Open Arms (feat. Travis Scott) is not_rap
Prediction for Kill Bill is not_rap
Prediction for Used (feat. Don Toliver) is not_rap
Prediction for we all try is rap
Prediction for Snooze is not_rap
Prediction for Used To Know Me is not_rap
Prediction for Sweet Life is not_rap
Prediction for Walk Em Down (Don't Kill Civilians) [with 21 Savage & feat. Mustafa] is rap
Prediction for Pilot Jones is not_rap
Prediction for Don't Get Chipped is not_rap
Prediction for JoHn Muir is rap
Prediction for TorcH is not_rap


In [92]:
pop_mix = pd.read_csv('pop_mixfeb2.csv')
predictions = prod_classifier.predict(pop_mix.iloc[:, 1:-1])
labels = pop_mix.iloc[:,0].to_list()
for i in range(len(predictions)):
    if predictions[i] == 'rap':
        print(labels[i] + ' was labeled as ' + predictions[i])

You Right was labeled as rap
Kool was labeled as rap
