## Model Test AI Song Capstone
***
This python notebook will test the validity of model for recommender system 

In [1]:
#importing libraries
import sweetviz as sv
import numpy as np
import pandas as pd
import plotly.graph_objs as go
import plotly.express as px
import matplotlib.pyplot as plt
pd.options.plotting.backend = "plotly"
from sklearn.metrics.pairwise import cosine_similarity, euclidean_distances

In [2]:
songs_df = pd.read_csv('SongsByGenre.csv');
songs_df.shape

(10782, 19)

In [None]:
songs_df.dtypes

In [None]:
songs_df.head()

In [3]:
x = songs_df.artists.str.get_dummies(sep=';')
songs_df = pd.concat([songs_df,x],axis=1)

In [4]:
x = songs_df.genre.str.get_dummies(sep=';')
songs_df = pd.concat([songs_df,x],axis=1)

In [5]:
# Getting dummies for nominal variables
def map_nominal_dummies(df, column_name):
    df_dummy = pd.get_dummies(df[column_name],prefix=column_name)
    df = pd.concat([df,df_dummy],axis=1)
    return df

In [6]:
songs_df = map_nominal_dummies(songs_df,'album')

In [7]:
songs_df['key'] = songs_df['key'].astype("str")
songs_df['mode'] = songs_df['mode'].astype("str")
songs_df.replace(inplace=True, to_replace={'mode': {'0': 'Minor','1':'Major'}})
songs_df.replace(inplace=True,to_replace={'key':{'0':'C','1':'C#/Db','2':'D','3':'D#/Eb','4':'E','5':'F','6':'F#/Gb',\
                                                '7':'G','8':'G#/Ab','9':'A','10':'A#/Bb','11':'B'}})

In [8]:
songs_df = map_nominal_dummies(songs_df,'key')
songs_df = map_nominal_dummies(songs_df,'mode')

In [13]:
songs_df.drop(inplace=True,columns=['id','album','genre','artists','key','mode'])

In [None]:
describe_df = songs_df.describe()
describe_df

In [None]:
num_columns = describe_df.columns

In [None]:
corr_map = songs_df.corr()
heat_map = go.Figure(go.Heatmap(x=num_columns,y=num_columns,z= corr_map,colorscale='YlorRd'))
heat_map.update_layout(title='Corelation Between All Numerical Features', \
                  width=60 * len(num_columns), \
                  height=35 * len(num_columns))

In [None]:
heat_map.write_html("SongCorrelation.html",auto_open=True)

In [9]:
songs_df.name = songs_df.name.str.lower()

In [10]:
def recommend_by_similarity(df,song_name,n):
    song_df = df[df.name == song_name.lower()]
    similar_df = df.copy()
    numerical_values = similar_df.loc[:, similar_df.columns != 'name']
    similar_df['Similarity'] =cosine_similarity(numerical_values, numerical_values.to_numpy()[song_df.index[0],None]).squeeze()
    similar_df.sort_values(by= 'Similarity', ascending = False, inplace=True)
    return similar_df.head(n)

In [16]:
recommend_by_similarity(songs_df,'Lovers on the Sun (feat. Sam Martin)',20)[['name','Similarity']]

Unnamed: 0,name,Similarity
1760,lovers on the sun (feat. sam martin),1.0
1246,if i had a gun...,1.0
6880,strange comfort,1.0
6206,express 999,1.0
3701,wieder winter,1.0
2232,burning angel,1.0
685,foetus of a new day kicking,1.0
8526,pick me up,1.0
8644,find dem flaw,1.0
7805,feel so close - radio edit,1.0
