In [2]:
# Importando as bibliotecas necessárias
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import accuracy_score, precision_score, confusion_matrix, matthews_corrcoef
from sklearn.model_selection import train_test_split

In [3]:
# Lendo a planilha com o pandas
df = pd.read_excel('excel_generos_1_72_.xlsx')

# definimos quais vão ser as colunas que vamos avaliar
variables = ["danceability",
             "energy", 
             "key", 
             "loudness", 
             "mode", 
             "speechiness", 
             "acousticness", 
             "instrumentalness", 
             "liveness",
             "valence"
            ]

X = df[variables]
y = df["category_id"]

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.1)

In [5]:
# Início Random Forest
# Criando o modelo de Random Forest
rf = RandomForestClassifier(n_estimators = 500)

# Treinando o modelo
rf.fit(X_train, y_train)

# Fazendo previsões com o modelo treinado
y_pred = rf.predict(X_test)

# Avaliando o modelo
accuracy = accuracy_score(y_test, y_pred) * 100
precision = precision_score(y_test, y_pred, average='micro') * 100
confusion = confusion_matrix(y_test, y_pred)
mcc = matthews_corrcoef(y_test, y_pred) * 100
error_rate = 1 - accuracy

print('Acurácia:', accuracy)
print('Precisão:', precision)
print('Matriz de confusão:')
print(confusion)
print('MCC:', mcc)
print('Taxa de erro:', error_rate)

# Fim Random Forest

Acurácia: 31.947958077340076
Precisão: 31.947958077340076
Matriz de confusão:
[[ 5  0  0 ...  0  0  7]
 [ 1 10  1 ...  0  0  0]
 [ 0  0 12 ...  0  0  0]
 ...
 [ 0  0  0 ...  5  5  0]
 [ 0  0  0 ...  1 20  0]
 [ 8  0  1 ...  0  0  3]]
MCC: 30.974005419387566
Taxa de erro: -30.947958077340076


In [9]:
#testes de músicas com features que existem na base

# 16
print(rf.predict([[0.818, 0.913, 4, -3.06, 0, 0.0426, 0.093, 0.0000369, 0.161, 0.536]]))

#23
print(rf.predict([[0.567, 0.371, 7, -10.779, 0, 0.0265, 0.773, 0, 0.114, 0.461]]))

# 28
print(rf.predict([[0.335, 0.0146, 4, -23.818, 0, 0.0441, 0.995, 0.883, 0.0838, 0.0754]]))

# 36
print(rf.predict([[0.765, 0.539, 5, -6.645, 0, 0.0638, 0.466, 0.0124, 0.195, 0.671]]))

#48
print(rf.predict([[0.344, 0.9, 10, -5.997, 1, 0.158, 0.000229, 0.000965, 0.316, 0.251]]))

print(rf.predict([[0.345, 0.9, 11, -5.998, 2, 0.159, 0.000230, 0.000966, 0.317, 0.252]]))

#Teste de múscia que não existe na base

# aleatória criada por mim
print(rf.predict([[0.1800, 0.500, 2, -3.047, 0, 0.0539, 0.730, 0, 0.761, 200]]))



[16]
[23]
[28]
[36]
[48]
[48]
[40]




In [105]:
# Lendo a planilha com dados de quais múscias os users ouviram
df_spotify_dataset_users_tracks = pd.read_csv('excel_features_156fd148c44f2bce755b09bf95985ee8_.csv', on_bad_lines='skip')

In [109]:
for idx, music in df_spotify_dataset_users_tracks.iterrows():
    print(music["id"])
    print(rf.predict([[music["danceability"], 
                       music["energy"], 
                       music["key"], 
                       music["loudness"], 
                       music["mode"],
                       music["speechiness"],
                       music["acousticness"], 
                       music["instrumentalness"], 
                       music["liveness"], 
                       music["valence"]]]))
    print("--------------")

0WZVGXO8FYpK8v1IDxlOyE
[7]
--------------
00cxhG668jV6gU6VK2FUVI
[7]
--------------
4eDYMhIin1pSLIG96f1aD0
[7]
--------------
40qjDdULzkFRlivTqTggLH
[7]
--------------
2SHnUyZq0zwmvRIl4WY77G
[7]
--------------
2sduRP2wX0dG7waoyGPOGf




[4]
--------------
1xMLthTaWJieT9YGV1hyS5
[7]
--------------
1gNDbq3ufi6VZJJuQWJiPe
[7]
--------------
1u6LOWtdWA2wwa3CryIBsS
[7]
--------------
2ee46eHIDrDT28dQWAhr21
[7]
--------------
5FU5b1b0hVR016VdeJcYRd
[7]
--------------
0yrKF6Fj0dxkltqmBMvcQU
[7]
--------------
6PtXobrqImYfnpIxNsJApa
[7]
--------------
6VRhkROS2SZHGlp0pxndbJ




[4]
--------------
640AR2YmgcKoJpaqTnCE4R




[7]
--------------
22DnBQIIXFPJNRGuNjWlSg
[7]
--------------
1SOfxwVz2KBMzatpfTHqIR
[7]
--------------
3BHXrvIfVQwyjwOccHlyN7
[6]
--------------
5PZI5xTcMq8FsCcEroth7S
[7]
--------------
7xQYVjs4wZNdCwO0EeAWMC
[7]
--------------
6fdHO3OJsu1Lwq0XSFmVCk
[7]
--------------
1trC8L8YpawkU553ymy2zC
[6]
--------------
4BSWjP0LLUv6rTBsUgjHUG
[7]
--------------
18zQdV6BhfeoVU4OLnEO7p




[7]
--------------
2Tz46GlN7rb3xs2JHKJstD
[7]
--------------
4wtR6HB3XekEengMX17cpc
[7]
--------------
5DKHdWT3u5mOu3dm8A8P1V
[7]
--------------
60wwxj6Dd9NJlirf84wr2c
[6]
--------------
3bxtgzAeAZPdRtKoPL5GG2
[7]
--------------
2ToIksTPpJ4csKPEOdUEyM
[1]
--------------
3Tcs2zdJsXSSFxO6liZbkS
[4]
--------------
5faJXkNwcNiY7qsIliiAfy




[7]
--------------
7pqgMEKsDMOHUdFQ7n0N9K
[1]
--------------
6YKnt1usFogwmxS6hYc5Jd
[6]
--------------
1gEfBDVdhmcDtV1LPQazkN
[7]
--------------
2VEZx7NWsZ1D0eJ4uv5Fym
[7]
--------------
5aPjlZVzYFxXzO6VN9XzeL
[7]
--------------
5B89IGusnn7ALpum6QnA02
[2]
--------------
4YLuaFFnWTJHu3tLiTPLRf
[7]
--------------
5gMzdeAupiOHDo4H8GMK4c
[7]
--------------
6UEJtAnGHnK5tVhoPHD6tU




[4]
--------------
4ZsPPvNEpvqWinxqlI68m7
[7]
--------------
5jayakmKQm7hDyoBhSoDKx
[7]
--------------
2OnqLGs0xZvr8ZNzWJELGo
[7]
--------------
6vseQZb17iPCXjCIzUkDAL
[7]
--------------
4ZK4vl7z7dr1aTGRTlgvgB
[7]
--------------
5kwAIpUBwhmiBpsJlcMgO1
[7]
--------------
0BomfsUcw03fQPKK668Etn
[6]
--------------




                                 user_id  "artistname"  \
350375  c6af8cb99f488aa2def073d68c7da58c  Tye Tribbett   
350376  c6af8cb99f488aa2def073d68c7da58c         Elena   

                   "trackname"    "playlistname"  
350375                Champion  Liked from Radio  
350376  Hot Girls - Radio Edit  Liked from Radio  


2
