In [38]:
import pandas as pd
import scipy.sparse as sparse 
import implicit 
import numpy as np
from sklearn.preprocessing import MinMaxScaler

clean = pd.read_csv('/Users/aidanairuser/Desktop/OPENWHYD/0404_log_clean.csv')   # importer le log 0404 propre
clean_df = pd.DataFrame(clean.drop('timestamp', axis=1))                         # virer la colonne 'timestamp'

# ce log_0404_propre fait 22M lignes, alors que l'original faisait 25M lignes

In [40]:
# use a groupby to produce the dataset in the form 'user - song - number of plays'

flipped = pd.DataFrame(clean_df.groupby ( ['user', 'song']).song.count ( )).add_suffix('_count').reset_index()
flipped.rename(columns = {'user':'user_id'}, inplace = True)

# changer le type de donnée pour 'song' d' "object" (string) à 'catégorie'
# créer une nouvelle colonne SONG_ID où chaque chanson à un ID du type (int 32)

flipped['song'] = flipped['song'].astype("category")
flipped['song_id'] = flipped['song'].cat.codes

In [41]:
# define a version of the als model

model = implicit.als.AlternatingLeastSquares(factors=20, regularization=0.1, iterations=20)


In [70]:

# NB --- THIS IS THE ENGINE THAT TURNS AND IS THEN READY TO GENERATE A FRESH BATCH OF SUGGESTIONS.

# The IMPLICIT LIBRARY EXPECTS DATA AS 2 ITEM-USER and USER-ITEM MATRICES at least if it's going to run ALS, 
# we use scipy to build the magic sparse dfs here

sparse_song_user = sparse.csr_matrix((flipped['song_count'].astype(float), (flipped['song_id'], flipped['user_id'])))
sparse_user_song = sparse.csr_matrix((flipped['song_count'].astype(float), (flipped['user_id'], flipped['song_id'])))

# La mesure Confiance dans l'algo a besoin d'un valeur ALPHA pour marcher. Ici, on donne 15.
# La mesure de confiance est calculé: matrice SONG_USER x confiance

alpha_val = 15               
data_conf = (sparse_song_user * alpha_val).astype('double') 

# La modèle est 'fitté' avec cette mesure de confiance

model.fit(data_conf)

100%|██████████| 20.0/20 [02:58<00:00,  8.14s/it]


In [71]:
# now that 'model' is running, we can call methods on it - on aura besoin
# Using the SONG vector (= a kind of profile) as an argument "Calculate the vector norms"

user_vecs = model.user_factors
song_vecs = model.item_factors

song_norms = np.sqrt((song_vecs * song_vecs).sum(axis=1))


In [72]:
# cette fonction prend: utilisateur_ID, matrice USER_SONG, user_vecs & song_vecs (qu'on vient de créer), NB de 'items'

def recommend(user_id, sparse_user_song, user_vecs, song_vecs, num_items=10):

    user_interactions = sparse_user_song[user_id,:].toarray()

    user_interactions = user_interactions.reshape(-1) + 1
    
    user_interactions[user_interactions > 1] = 0

    rec_vector = user_vecs[user_id,:].dot(song_vecs.T).toarray()

    min_max = MinMaxScaler()
    rec_vector_scaled = min_max.fit_transform(rec_vector.reshape(-1,1))[:,0]
    recommend_vector = user_interactions * rec_vector_scaled

    item_idx = np.argsort(recommend_vector)[::-1][:num_items]

    songs = []
    scores = []

    for idx in item_idx:
        
        songs.append(flipped.song.loc[flipped.song_id == idx].iloc[0])
        scores.append(recommend_vector[idx])

    recommendations = pd.DataFrame({'song': songs, 'score': scores})

    return recommendations

In [75]:

# Get the trained user and item vectors. We convert them to csr matrices to work with our previous recommend function.
# we ALREADY had user and song_vecs - but here they're defined in terms of the matrices.

user_vecs = sparse.csr_matrix(model.user_factors)
song_vecs = sparse.csr_matrix(model.item_factors)

#item_vecs = model.item_factors

user_id = 1      # Create recommendations for a user 

recommendations = recommend(user_id, sparse_user_song, user_vecs, song_vecs)

print (recommendations)

          song     score
0  mfnzpfgR_AE  1.000000
1  4ZHwu0uut3k  0.919856
2  bpOSxM0rNPM  0.915817
3  fiore9Z5iUg  0.911038
4  o47HCjB3Plc  0.909622
5  QqkYwkjpo_o  0.906726
6  JaAWdljhD5o  0.899376
7  BQeMxWjpr-Y  0.897428
8  9mnoiRqh0dQ  0.897018
9  n0FOPTYJPXw  0.896480


In [76]:
# on refait tourner mais avec un YT scrape / lookup pour pouvoir écouter

rec_songs_only = recommendations.song
happy_listener = user_id

print ("l'auditeur content c'est le", happy_listener)

for item in rec_songs_only:    
        
    yt = "https://www.youtube.com/watch?v="
    url = yt.strip() + item.strip()
    page = requests.get(url)
    soup = bs (page.text, 'html.parser')

    for item in soup.find_all('h1',{'class':'watch-title-container'}): 
        for post in item.find_all('span',{'class':'watch-title'}): 
            print (post.string.strip())
            print (url)


l'auditeur content c'est le 1
Annihilator - Brain Dance [HD/1080p]
https://www.youtube.com/watch?v=mfnzpfgR_AE
Tom Odell - Another Love (Zwette Edit)
https://www.youtube.com/watch?v=4ZHwu0uut3k
Arctic Monkeys - Do I Wanna Know? (Official Video)
https://www.youtube.com/watch?v=bpOSxM0rNPM
Lilly Wood & The Prick and Robin Schulz - Prayer In C (Robin Schulz Remix) (Official)
https://www.youtube.com/watch?v=fiore9Z5iUg
Missy Elliot - The Rain
https://www.youtube.com/watch?v=o47HCjB3Plc
Six Days - DJ Shadow
https://www.youtube.com/watch?v=QqkYwkjpo_o
SAIL - AWOLNATION (Unofficial Video)
https://www.youtube.com/watch?v=JaAWdljhD5o
Coldplay - Midnight
https://www.youtube.com/watch?v=BQeMxWjpr-Y
Milky Chance - Down By The River (FlicFlac Edit)
https://www.youtube.com/watch?v=9mnoiRqh0dQ
Disclosure - F For You ft. Mary J. Blige (Official Video)
https://www.youtube.com/watch?v=n0FOPTYJPXw
