# Demo Synth Data

In [1]:
# import libraries

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform

In [2]:
# names and tags

names=['id__'+str(i) for i in range(2000)]

tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
      'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
      'running', 'gym', 'golf', 'basket', 'football', 'yoga']

In [3]:
# synth data

data=np.random.randint(0, 6, (len(names), len(tags)))
s_data=pd.DataFrame(data, columns=tags, index=names)
s_data['plan']=np.random.randint(0, 2, (len(names), 1))
s_data['id']=[i for i in range(len(names))]
s_data.shape

(2000, 20)

In [4]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='euclidean'  

In [None]:
squareform(pdist(s_data.iloc[:, :-2], metric))

In [None]:
# similarity matrix

similar=pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-1], metric))), 
                         index=s_data.index, columns=s_data.index)

similar.head()

In [None]:
similar.shape

# New User

In [None]:
# new fixed user

n_rating=np.random.randint(0, 6, (1, len(tags)))
n_user={k:v for k, v in list(zip(tags, n_rating[0]))}
n_user['id']='id_2001'
n_user['plan']=0

n_user

In [None]:
# new user introduced into system

s_data=s_data.append(n_user, ignore_index=True)
names.append(n_user['id'])
s_data.index=names

In [None]:
# similarity for new user

similar = pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-2], metric))), 
                         index=s_data.index, columns=s_data.index)


similarities = similar[n_user['id']].sort_values(ascending=False)



closer_users=[]
for e in similarities.index:
    if s_data.ix[e].plan==1:
        closer_users.append(e)
   
    
# closer users with plan
closer_users[:10]

In [None]:
s_data.ix[closer_users[:10]]

# New User Input

In [28]:
# function for new users

def new_user(df, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga, metric):
    
    
    tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
          'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
          'running', 'gym', 'golf', 'basket', 'football', 'yoga']
    
    
    rating=[rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga]
    
    
    
    n_user={k:v for k, v in list(zip(tags, rating))}
    n_user['id']='id__'+str(len(s_data.id)+1)
    n_user['plan']=0
    
    
    names=list(df.index)
    df=df.append(n_user, ignore_index=True)
    names.append(n_user['id'])
    df.index=names
    
    
    
    similar = pd.DataFrame(1/(1 + squareform(pdist(df.iloc[:, :-2], metric))), 
                         index=df.index, columns=df.index)


    similarities = similar[n_user['id']].sort_values(ascending=False)



    closer_users=[]
    for e in similarities.index:
        if df.ix[e].plan==1:
            closer_users.append(e)
    
    
    
    return df.ix[closer_users[:10]]
    
    
    
    

# Input

In [6]:
print('Rate r&b from 0 to 5:')
rb = int(input())

Rate r&b from 0 to 5:
5


In [7]:
print('Rate rock from 0 to 5:')
rock = int(input())

Rate rock from 0 to 5:
5


In [8]:
print('Rate jazz from 0 to 5:')
jazz = int(input())

Rate jazz from 0 to 5:
0


In [9]:
print('Rate techno from 0 to 5:')
techno = int(input())

Rate techno from 0 to 5:
0


In [10]:
print('Rate pop from 0 to 5:')
pop = int(input())

Rate pop from 0 to 5:
3


In [11]:
print('Rate indie from 0 to 5:')
indie = int(input())

Rate indie from 0 to 5:
4


In [12]:
print('Rate cinema from 0 to 5:')
cinema = int(input())

Rate cinema from 0 to 5:
1


In [13]:
print('Rate theater from 0 to 5:')
theater = int(input())

Rate theater from 0 to 5:
2


In [14]:
print('Rate beers from 0 to 5:')
beers = int(input())

Rate beers from 0 to 5:
1


In [17]:
print('Rate wine from 0 to 5:')
wine = int(input())

Rate wine from 0 to 5:
0


In [18]:
print('Rate party from 0 to 5:')
party = int(input())

Rate party from 0 to 5:
1


In [19]:
print('Rate trips from 0 to 5:')
trips = int(input())

Rate trips from 0 to 5:
1


In [20]:
print('Rate running from 0 to 5:')
running = int(input())

Rate running from 0 to 5:
5


In [21]:
print('Rate gym from 0 to 5:')
gym = int(input())

Rate gym from 0 to 5:
1


In [22]:
print('Rate golf from 0 to 5:')
golf = int(input())

Rate golf from 0 to 5:
2


In [23]:
print('Rate basket from 0 to 5:')
basket = int(input())

Rate basket from 0 to 5:
3


In [24]:
print('Rate football from 0 to 5:')
football = int(input())

Rate football from 0 to 5:
4


In [26]:
print('Rate yoga from 0 to 5:')
yoga = int(input())

Rate yoga from 0 to 5:
0


In [None]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='euclidean'  

In [29]:
plans=new_user(s_data, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
               party, trips, running, gym, golf, basket, football, yoga, metric)

In [30]:
display(plans)

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__1543,5,3,1,1,5,1,3,4,2,0,0,1,4,1,3,3,2,0,1,1543
id__779,4,3,0,2,2,3,1,4,1,2,1,3,5,2,0,2,5,3,1,779
id__1889,3,5,3,0,0,1,0,2,3,0,0,1,5,1,2,1,4,0,1,1889
id__1843,3,4,0,1,3,3,2,5,1,0,2,4,3,4,2,4,3,1,1,1843
id__1909,4,3,2,1,2,5,1,2,2,2,1,1,4,0,0,3,0,2,1,1909
id__651,5,3,1,1,2,5,5,3,1,2,1,1,4,2,0,2,2,2,1,651
id__1972,4,3,1,3,1,4,4,3,0,1,0,0,2,2,3,2,4,0,1,1972
id__1955,3,5,3,0,3,4,3,2,1,1,3,4,5,2,3,0,2,1,1,1955
id__1705,5,3,2,2,3,0,1,3,3,3,2,1,4,1,1,3,3,1,1,1705
id__702,3,3,1,1,4,5,2,1,2,3,3,1,5,4,3,3,5,3,1,702
