# Sistema de recomendação

Neste notebook iremos fazer o sistema de recomendação

### Bibliotecas

- pandas

### Instalação e importação de dependências

In [1]:
pip install scikit-learn


Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

Nesta etapa buscamos os `.csv` criados no notebook `03-process-data.ipynb` para fazermos as recomendações.

In [3]:
tracks_df = pd.read_csv("tracks.csv")
features_df = pd.read_csv("features.csv")


In [4]:
tracks_df.head()


Unnamed: 0,artist_name,track_name,track_url,danceability,energy,key,loudness,mode,speechiness,acousticness,...,analysis_url,duration_ms,time_signature,artist_popularity,track_popularity,genres,artists_song,genres_list,subjectivity,polarity
0,Shakira,Chantaje (feat. Maluma),6mICuAdrwEjh6Y6lroV2Kg,0.852,0.773,8,-2.921,0,0.0776,0.187,...,https://api.spotify.com/v1/audio-analysis/6mIC...,195840,4,94,76,colombian_pop dance_pop latin_pop,ShakiraChantaje (feat. Maluma),"['colombian_pop', 'dance_pop', 'latin_pop']",low,Neutral
1,Ricky Martin,Vente Pa' Ca (feat. Maluma),7DM4BPaS7uofFul3ywMe46,0.663,0.92,11,-4.07,0,0.226,0.00431,...,https://api.spotify.com/v1/audio-analysis/7DM4...,259196,4,76,70,latin_pop mexican_pop puerto_rican_pop,Ricky MartinVente Pa' Ca (feat. Maluma),"['latin_pop', 'mexican_pop', 'puerto_rican_pop']",low,Neutral
2,CNCO,Reggaetón Lento (Bailemos),3AEZUABDXNtecAOSC1qTfo,0.761,0.838,4,-3.073,0,0.0502,0.4,...,https://api.spotify.com/v1/audio-analysis/3AEZ...,222560,4,72,71,boy_band latin_pop reggaeton,CNCOReggaetón Lento (Bailemos),"['boy_band', 'latin_pop', 'reggaeton']",low,Neutral
3,"J Balvin, Pharrell Williams, BIA, Sky",Safari,6rQSrBHf7HlZjtcMZ4S4bO,0.508,0.687,0,-4.361,1,0.326,0.551,...,https://api.spotify.com/v1/audio-analysis/6rQS...,205600,4,89,0,reggaeton reggaeton_colombiano urbano_latino,"J Balvin, Pharrell Williams, BIA, SkySafari","['reggaeton', 'reggaeton_colombiano', 'urbano_...",low,Neutral
4,Daddy Yankee,Shaky Shaky,58IL315gMSTD37DOZPJ2hf,0.899,0.626,6,-4.228,0,0.292,0.076,...,https://api.spotify.com/v1/audio-analysis/58IL...,234320,4,90,0,latin_hip_hop reggaeton urbano_latino,Daddy YankeeShaky Shaky,"['latin_hip_hop', 'reggaeton', 'urbano_latino']",high,Negative


In [5]:
features_df.head()


Unnamed: 0.1,Unnamed: 0,genre|21st_century_classical,genre|432hz,genre|48g,genre|5th_wave_emo,genre|8d,genre|_brasileira,genre|_hip_hop,genre|_house,genre|_roll,...,key|5,key|6,key|7,key|8,key|9,key|10,key|11,mode|0,mode|1,id
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,6mICuAdrwEjh6Y6lroV2Kg
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,7DM4BPaS7uofFul3ywMe46
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,3AEZUABDXNtecAOSC1qTfo
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,6rQSrBHf7HlZjtcMZ4S4bO
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,58IL315gMSTD37DOZPJ2hf


Além desses dois `.csv`, também precisamos de uma playlist de teste.

### Importar e pré processar playlist de teste

In [6]:
def drop_duplicates(df):
    df["artists_song"] = df.apply(lambda row: str(
        row["artist_name"]) + str(row["track_name"]), axis=1)
    return df.drop_duplicates("artists_song")


def select_columns(df):
    return df[["artist_name", "id", "track_name", "danceability", "energy", "key", "loudness", "mode",
               "speechiness", "acousticness", "instrumentalness", "liveness", "valence", "tempo", "artist_popularity", "genres", "track_popularity"]]


def process_genres(df):
    df["genres_list"] = df["genres"].apply(lambda x: str(x).split(" "))
    return df


def process_playlist(df):
    df = drop_duplicates(df)
    df = select_columns(df)
    df = process_genres(df)

    return df


In [7]:
playlist_df = pd.read_csv("playlist.csv")
playlist_df = process_playlist(playlist_df)
playlist_df.head()


Unnamed: 0,artist_name,id,track_name,danceability,energy,key,loudness,mode,speechiness,acousticness,instrumentalness,liveness,valence,tempo,artist_popularity,genres,track_popularity,genres_list
0,DENNIS,0fABszUFNbNq9IW503Gj8v,Tá OK,0.863,0.96,11,-1.458,1,0.0492,0.497,0.00526,0.0853,0.586,130.118,72,funk_carioca funk_das antigas sertanejo_univer...,92,"[funk_carioca, funk_das, antigas, sertanejo_un..."
1,AgroPlay,4uqJelb9THHmJ3OCohg4ZJ,Nosso Quadro,0.691,0.708,9,-4.821,1,0.195,0.338,0.0,0.311,0.602,160.018,73,agronejo,85,[agronejo]
2,Veigh,4hKLzFvNwHF6dPosGT30ed,Novo Balanço,0.836,0.499,3,-5.82,0,0.0563,0.669,0.0,0.134,0.65,124.05,80,trap_brasileiro,90,[trap_brasileiro]
3,Israel & Rodolffo,3PH1nUysW7ybo3Yu8sqlPN,Seu Brilho Sumiu - Ao Vivo,0.625,0.916,6,-3.704,1,0.0461,0.309,0.0,0.906,0.746,153.664,76,agronejo arrocha sertanejo sertanejo_universit...,87,"[agronejo, arrocha, sertanejo, sertanejo_unive..."
4,Simone Mendes,51pxwIxDVCbFHW8oyJFQXD,Erro Gostoso - Ao Vivo,0.588,0.893,6,-2.743,1,0.0866,0.176,0.0,0.803,0.626,153.778,73,sertanejo,90,[sertanejo]


In [8]:
def generate_playlist(features_df, playlist_df):
    # Encontra features na playlist
    features_in_playlist = features_df[
        features_df["id"].isin(playlist_df["id"].values)
    ]

    # Encontra features que não estão na playlist
    features_not_in_playlist = features_df[
        ~features_df["id"].isin(playlist_df["id"].values)
    ]
    features_in_playlist = features_in_playlist.drop(columns="id")

    return features_in_playlist.sum(axis=0), features_not_in_playlist


In [9]:
# Gera as features
features_in_playlist, features_not_in_playlist = generate_playlist(
    features_df, playlist_df
)


In [10]:
features_not_in_playlist.head()

Unnamed: 0.1,Unnamed: 0,genre|21st_century_classical,genre|432hz,genre|48g,genre|5th_wave_emo,genre|8d,genre|_brasileira,genre|_hip_hop,genre|_house,genre|_roll,...,key|5,key|6,key|7,key|8,key|9,key|10,key|11,mode|0,mode|1,id
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.5,0.0,0.0,0.0,0.5,0.0,6mICuAdrwEjh6Y6lroV2Kg
1,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.5,0.0,7DM4BPaS7uofFul3ywMe46
2,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,0.0,3AEZUABDXNtecAOSC1qTfo
3,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.5,6rQSrBHf7HlZjtcMZ4S4bO
4,4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.5,0.0,0.0,0.0,0.0,0.0,0.5,0.0,58IL315gMSTD37DOZPJ2hf


In [11]:
features_in_playlist.head()

Unnamed: 0                      2748386.0
genre|21st_century_classical          0.0
genre|432hz                           0.0
genre|48g                             0.0
genre|5th_wave_emo                    0.0
dtype: float64

In [12]:
def get_recommendations(tracks_df, features_in_playlist, features_not_in_playlist):
    non_playlist_df = tracks_df[
        tracks_df["id"].isin(features_not_in_playlist["id"].values)
    ]

    non_playlist_df["sim"] = cosine_similarity(
        features_not_in_playlist.drop("id", axis=1).values,
        features_in_playlist.values.reshape(1, -1),
    )[:, 0]

    predict_playlist = non_playlist_df.sort_values("sim", ascending=False).head(50)

    return predict_playlist

In [13]:
recommended_playlist = get_recommendations(
    tracks_df, features_in_playlist, features_not_in_playlist
)
recommended_playlist.head()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  non_playlist_df["sim"] = cosine_similarity(


Unnamed: 0,artist_name,track_name,track_url,danceability,energy,key,loudness,mode,speechiness,acousticness,...,duration_ms,time_signature,artist_popularity,track_popularity,genres,artists_song,genres_list,subjectivity,polarity,sim
291687,Clayton & Romário,Se Eu Pedir Cê Volta / Ela É Demais - Ao Vivo,0Igd14iSpQI7M7EqBlJ7N4,0.592,0.895,0,-2.658,1,0.0666,0.419,...,165824,4,72,58,arrocha sertanejo sertanejo_pop sertanejo_univ...,Clayton & RomárioSe Eu Pedir Cê Volta / Ela É ...,"['arrocha', 'sertanejo', 'sertanejo_pop', 'ser...",low,Neutral,1.0
294194,João Bosco & Vinicius,"Chora, Me Liga - Ao Vivo",1NpD7cQH8hGMZrufOQQ0Xk,0.682,0.905,0,-3.825,1,0.112,0.257,...,181147,4,64,60,arrocha sertanejo sertanejo_pop sertanejo_univ...,"João Bosco & ViniciusChora, Me Liga - Ao Vivo","['arrocha', 'sertanejo', 'sertanejo_pop', 'ser...",low,Neutral,1.0
284906,Luan Santana,Sinais - Ao Vivo,7EgRqkvNThCaCJB0lhyZH5,0.638,0.886,0,-4.714,1,0.0321,0.348,...,233280,4,78,49,arrocha sertanejo sertanejo_pop sertanejo_univ...,Luan SantanaSinais - Ao Vivo,"['arrocha', 'sertanejo', 'sertanejo_pop', 'ser...",low,Neutral,1.0
329167,Fernando & Sorocaba,Madri - Ao Vivo,4IPopPf6xn1mx57yuNgjkN,0.772,0.497,7,-7.548,1,0.0316,0.591,...,240333,4,64,55,arrocha sertanejo sertanejo_pop sertanejo_univ...,Fernando & SorocabaMadri - Ao Vivo,"['arrocha', 'sertanejo', 'sertanejo_pop', 'ser...",low,Neutral,1.0
326894,Gustavo Mioto,Anti-Amor - Ao Vivo,7JdZZfozlODQ08ATf8xIZ6,0.643,0.541,1,-8.786,1,0.0777,0.463,...,165360,4,77,0,arrocha sertanejo sertanejo_pop sertanejo_univ...,Gustavo MiotoAnti-Amor - Ao Vivo,"['arrocha', 'sertanejo', 'sertanejo_pop', 'ser...",low,Neutral,1.0


In [14]:
recommended_playlist[["artist_name","track_name"]][:50]

Unnamed: 0,artist_name,track_name
291687,Clayton & Romário,Se Eu Pedir Cê Volta / Ela É Demais - Ao Vivo
294194,João Bosco & Vinicius,"Chora, Me Liga - Ao Vivo"
284906,Luan Santana,Sinais - Ao Vivo
329167,Fernando & Sorocaba,Madri - Ao Vivo
326894,Gustavo Mioto,Anti-Amor - Ao Vivo
291046,Zé Neto & Cristiano,Estado Decadente - Ao Vivo
294139,Diego & Victor Hugo,To Te Filmando (Sorria) / Meu Bem Querer - Ao ...
294086,João Bosco & Vinicius,Endereço
291477,Diego & Victor Hugo,Lugar que Tem Banheira - Ao Vivo
294208,Zé Neto & Cristiano,Amigo Taxista - Ao Vivo


In [15]:
recommended_playlist.to_csv("recommended_playlist.csv", index=False)