## Import

In [1]:
import pandas as pd
import numpy as np
import random
import networkx as nx
import matplotlib.pyplot as plt
from itertools import chain
import pandas as pd
from pandas import ExcelWriter
from pandas import ExcelFile

## Functions

In [2]:
def hamming(a,b):
    h = len(a)
    for i in range(len(a)):
        if a[i] == b[i]:
            h += -1
            
    h2 = len(a) - h
    return(h)

def convert_range(mylist,min_x,max_x):
    norm_x = [(x-min(mylist))/(max(mylist)-min(mylist)) for x in mylist]
    range_list = [x*(max_x-1)+min_x for x in norm_x]
    return range_list

def create_feature_ids(columns):
    feature_ids = []
    for i in range(len(columns)):
        f_id = 'f_'+str(i)
        feature_ids.append(f_id)
        
    return feature_ids

In [7]:
def create_songs_dict(df):
    song_id_list = []
    songs_dict = {}
    for index, row in df.iterrows():
        songs_dict[index] = row[1:].to_list()
        
    
    return songs_dict

In [8]:
def create_user(user_id,init_id, songs_dict,n_songs_rated):
    
    init_song = songs_dict[init_id]
    
    ids = list(songs_dict.keys())
    song_features = list(songs_dict.values())
    
    hamming_similarity_dict = {}
    hamming_distances = [hamming(init_song,song) for song in song_features]
    
    

    
    #probabilities 
    hamming_similarity = [max(hamming_distances)-h for h in hamming_distances]
    hamming_similarity = convert_range(hamming_similarity,1,10)
    hamming_similarity = np.array(hamming_similarity)
    
    for i in range(len(ids)):
        hamming_similarity_dict[ids[i]] = hamming_similarity[i]

    p = hamming_similarity/hamming_similarity.sum(axis=0,keepdims=1)
    
    hamming_std =  np.std(hamming_similarity,  keepdims=True)

    user_songs = [init_id]  
    user_songs += list(np.random.choice(np.array(ids),n_songs_rated, p=p, replace = False))
    user_songs = list(set(user_songs))
    
    user_songs_rating = []
    for song_id in user_songs:
        rating = np.random.normal(loc=hamming_similarity_dict[song_id], scale=hamming_std)
        rating = round(int(rating),0)
        if rating > 10:
            rating = 10
        elif rating <1:
            rating = 1
            
        user_songs_rating.append([song_id,user_id,rating])
    return user_songs_rating




In [9]:
def create_files(song_database,user_names,init_song_list,n_songs_rated_per_user):
    
    df = pd.read_excel(song_database)
    for index, row in df.iterrows():
        df.loc[index, 'Song'] = row['Artist']+ ' ' + '-' + ' '+ row['Song']
    df = df.drop('Artist',axis=1)
    feature_ids = create_feature_ids(df.columns[2:])
    columns = ['song_id','name']+feature_ids
    df.columns = columns
    df.to_csv("PGM-final-project-master/data/music_data2.csv",header=True, index=False)
    df = df.set_index('song_id')
    
    user_data = []
    for ind,name in enumerate(user_names):
        user_id = 'U00'+str(ind)
        user_data.append([user_id,name])
        
    user_dataframe = pd.DataFrame(user_data, columns = ['user_id','name'])

    # CREATE USER_DATA CSV FILE
    user_dataframe.to_csv("PGM-final-project-master/data/user_data2.csv",header=True, index=False)
    songs_dict = create_songs_dict(df)

    all_user_ratings = []
    for i in range(len(init_song_list)):
        u_id = user_data[i][0]
        s_id = init_song_list[i]
        user_songs_ratings = create_user(u_id,s_id,songs_dict,n_songs_rated_per_user)
        [all_user_ratings.append(tup) for tup in user_songs_ratings]


    ratings_dataframe = pd.DataFrame(all_user_ratings, columns = ['song_id','user_id','rating'])
    ratings_dataframe.to_csv("PGM-final-project-master/data/ratings2.csv",header=True, index=False)

## CREATE THE CSV FILES

In [10]:
# address of the original song-features excel file:
songs_database = "PGM-final-project-master/data/song_features.xlsx"

# list of users
user_names = ['Federico', 'Alejandra','Mario','Giulia']

# choose one initial starting song for each user, to generate that user's song base
init_song_list = ['S01','E03','J02','R07']
create_files(songs_database,user_names,init_song_list,12)