# Music Recommendation

In [1]:
#import libraries
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import nltk
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
#load the scraped and cleaned data
df = pd.read_csv("Boomplay Scraped songs.csv")
df

Unnamed: 0.1,Unnamed: 0,song_name,artist_name,a_age,time
0,0,Like Ice Spice,Blaqbonez,1996-01-29,02:03
1,1,Defender,Frankie Jay,,02:04
2,2,Ngozi,Crayon,,03:41
3,3,Sharpally,Young Jonn,1995-02-16,02:43
4,4,Lonely At The Top,Asake,1995-01-13,02:37
...,...,...,...,...,...
99,99,DOG EAT DOG II,ODUMODUBLVCK,,04:00
100,100,Bad Influence,Omah Lay,1997-05-19,02:18
101,101,Para Boi,Seyi Vibez,1990-07-12,02:37
102,102,All My Life,Lil Durk,1992-10-19,03:43


In [3]:
#Copy dataset to avoid tampering with the original
df_new = df.copy()


#convert timestamp to string
df_new['time']=df_new['time'].astype('str')
df_new['a_age']=df_new['a_age'].astype('str')

In [4]:
#check
df_new.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 104 entries, 0 to 103
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Unnamed: 0   104 non-null    int64 
 1   song_name    104 non-null    object
 2   artist_name  104 non-null    object
 3   a_age        104 non-null    object
 4   time         104 non-null    object
dtypes: int64(1), object(4)
memory usage: 4.2+ KB


In [5]:
#Create tags
df_new['tags'] = df_new['song_name']+df_new['artist_name']+df_new['a_age']+df_new['time']
df_new['song_name'] = df_new['song_name'].apply(lambda x:x.strip())
df_new

Unnamed: 0.1,Unnamed: 0,song_name,artist_name,a_age,time,tags
0,0,Like Ice Spice,Blaqbonez,1996-01-29,02:03,Like Ice SpiceBlaqbonez1996-01-2902:03
1,1,Defender,Frankie Jay,,02:04,DefenderFrankie Jaynan02:04
2,2,Ngozi,Crayon,,03:41,Ngozi Crayonnan03:41
3,3,Sharpally,Young Jonn,1995-02-16,02:43,SharpallyYoung Jonn1995-02-1602:43
4,4,Lonely At The Top,Asake,1995-01-13,02:37,Lonely At The TopAsake1995-01-1302:37
...,...,...,...,...,...,...
99,99,DOG EAT DOG II,ODUMODUBLVCK,,04:00,DOG EAT DOG II ODUMODUBLVCKnan04:00
100,100,Bad Influence,Omah Lay,1997-05-19,02:18,Bad InfluenceOmah Lay1997-05-1902:18
101,101,Para Boi,Seyi Vibez,1990-07-12,02:37,Para BoiSeyi Vibez1990-07-1202:37
102,102,All My Life,Lil Durk,1992-10-19,03:43,All My Life Lil Durk1992-10-1903:43


In [6]:
#vectorize the tags
cv = CountVectorizer(max_features=5000, stop_words='english')

In [7]:
#Reducing or stemming words to it's roots
ps = nltk.stem.PorterStemmer()


def stem(obj):
    y = []
    for i in obj.split():
        y.append(ps.stem(i))
    return " ".join(y)

In [8]:
df_new['tags'] = df_new['tags'].apply(stem)

In [9]:
vector = cv.fit_transform(df_new['tags']).toarray()
vector

array([[0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=int64)

In [10]:
#finding the cosine similarities in the vector using cosine siimilarities

similarity = cosine_similarity(vector)

In [11]:
similarity.shape

(104, 104)

In [12]:
df_new['tags']

0      like ice spiceblaqbonez1996-01-2902:03
1                  defenderfranki jaynan02:04
2                        ngozi crayonnan03:41
3          sharpallyyoung jonn1995-02-1602:43
4         lone at the topasake1995-01-1302:37
                        ...                  
99        dog eat dog ii odumodublvcknan04:00
100      bad influenceomah lay1997-05-1902:18
101         para boiseyi vibez1990-07-1202:37
102       all my life lil durk1992-10-1903:43
103       amaz godmerci chinwo1991-09-0504:56
Name: tags, Length: 104, dtype: object

In [13]:
#reduce all the song title to lower case
df_new['song_name'] = df_new['song_name'].apply(lambda x:x.lower())

In [21]:
def recommend(key):
    
    key = key.lower()
    index = df_new[df_new['song_name'] == key].index[0]     #extracts an index where the song is present
    sim_row = similarity[index]
    song_ls = sorted(list(enumerate(sim_row)), reverse= True, key = lambda x:x[1])[1:15] #getting the list on [index, similarity records]
    
        #getting the list of song titles
    for i in song_ls:
        print(df_new.iloc[i[0]].song_name)
        
    #return songs

# FINAL OUTPUT TEST


In [22]:
#input your Nigerian songs
recommend('i told them')

cheat on me
on form
common person
for my hand
tested, approved & trusted
normal
city boys
big 7
karma
dejavu
wonder
hollow
hushpuppi
spending


# Ask for Recommendations

In [None]:

while True:
    L=[]
    try:
        # Prompt the user for input
        input_ = input("Enter your song:", '\n')
        L.append(input_)
        #print(input_)
        #input_ = f'"{input_}"'
        for i in L:
            recommend(i)
        break
        
    except Exception as e:
        # Handle any exceptions that occur during the processing
        print("Not in our database")
        print("Please try again.")

print("Exiting the program.")



Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our database
Please try again.
Not in our d