# Demo Synth Data

In [1]:
# import libraries

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform

In [2]:
# names and tags

names=['id__'+str(i) for i in range(2000)]

tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
      'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
      'running', 'gym', 'golf', 'basket', 'football', 'yoga']

In [3]:
# synth data

data=np.random.randint(0, 6, (len(names), len(tags)))
s_data=pd.DataFrame(data, columns=tags, index=names)
s_data['plan']=np.random.randint(0, 2, (len(names), 1))
s_data['id']=[i for i in range(len(names))]
s_data.shape

(2000, 20)

In [4]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='euclidean'  

In [5]:
squareform(pdist(s_data.iloc[:, :-1], metric))

array([[ 0.        ,  8.94427191,  9.64365076, ...,  9.64365076,
        11.26942767,  9.53939201],
       [ 8.94427191,  0.        , 10.14889157, ...,  9.43398113,
        12.52996409, 10.81665383],
       [ 9.64365076, 10.14889157,  0.        , ..., 10.19803903,
        10.39230485, 11.3137085 ],
       ...,
       [ 9.64365076,  9.43398113, 10.19803903, ...,  0.        ,
        11.13552873, 10.09950494],
       [11.26942767, 12.52996409, 10.39230485, ..., 11.13552873,
         0.        , 10.29563014],
       [ 9.53939201, 10.81665383, 11.3137085 , ..., 10.09950494,
        10.29563014,  0.        ]])

In [6]:
# similarity matrix

similar=pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-1], metric))), 
                         index=s_data.index, columns=s_data.index)

similar.head()

Unnamed: 0,id__0,id__1,id__2,id__3,id__4,id__5,id__6,id__7,id__8,id__9,...,id__1990,id__1991,id__1992,id__1993,id__1994,id__1995,id__1996,id__1997,id__1998,id__1999
id__0,1.0,0.10056,0.093953,0.088913,0.089301,0.094414,0.086688,0.084959,0.085638,0.083972,...,0.085638,0.118818,0.1,0.105426,0.090909,0.096331,0.104141,0.093953,0.081503,0.094882
id__1,0.10056,1.0,0.089695,0.074798,0.082403,0.087779,0.074798,0.072025,0.091747,0.085983,...,0.08302,0.118818,0.110348,0.090094,0.089301,0.078448,0.085638,0.095841,0.07391,0.084626
id__2,0.093953,0.089695,1.0,0.06764,0.1,0.093953,0.106762,0.093953,0.08092,0.084959,...,0.098908,0.105426,0.098375,0.09683,0.1,0.101131,0.103517,0.089301,0.087779,0.08121
id__3,0.088913,0.074798,0.06764,1.0,0.074798,0.088152,0.077421,0.071824,0.0821,0.080634,...,0.069545,0.086333,0.06901,0.10745,0.073054,0.085297,0.088913,0.08302,0.084297,0.072025
id__4,0.089301,0.082403,0.1,0.074798,1.0,0.08853,0.095841,0.097337,0.08007,0.0821,...,0.08121,0.079792,0.077171,0.086333,0.10056,0.094414,0.094414,0.083972,0.081503,0.076678


In [7]:
similar.shape

(2000, 2000)

# New User

In [8]:
# new fixed user

n_rating=np.random.randint(0, 6, (1, len(tags)))
n_user={k:v for k, v in list(zip(tags, n_rating[0]))}
n_user['id']='id_2001'
n_user['plan']=0

n_user

{'r&b': 1,
 'rock': 1,
 'jazz': 4,
 'techno': 4,
 'pop': 4,
 'indie': 1,
 'cinema': 4,
 'theater': 0,
 'beers': 1,
 'wine': 3,
 'party': 3,
 'trips': 1,
 'running': 5,
 'gym': 0,
 'golf': 1,
 'basket': 3,
 'football': 1,
 'yoga': 1,
 'id': 'id_2001',
 'plan': 0}

In [9]:
# new user introduced into system

s_data=s_data.append(n_user, ignore_index=True)
names.append(n_user['id'])
s_data.index=names

In [10]:
# similarity for new user

similar = pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-1], metric))), 
                         index=s_data.index, columns=s_data.index)


similarities = similar[n_user['id']].sort_values(ascending=False)



closer_users=[]
for e in similarities.index:
    if s_data.ix[e].plan==1:
        closer_users.append(e)
   
    
# closer users with plan
closer_users[:10]

['id__26',
 'id__438',
 'id__19',
 'id__1308',
 'id__630',
 'id__334',
 'id__813',
 'id__1292',
 'id__1145',
 'id__619']

In [11]:
s_data.ix[closer_users[:10]]

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__26,0,3,5,4,4,1,5,2,1,0,3,1,5,0,2,4,2,1,1,26
id__438,1,4,1,3,5,1,3,1,1,2,5,2,5,1,2,3,0,1,1,438
id__19,4,4,5,5,4,4,4,0,1,3,3,1,5,1,0,4,1,0,1,19
id__1308,2,3,3,5,5,2,5,0,2,2,0,1,3,0,1,1,3,0,1,1308
id__630,0,3,4,3,3,0,1,0,0,3,5,2,2,0,2,3,1,3,1,630
id__334,0,1,1,1,3,2,4,3,0,2,4,2,5,0,2,5,0,1,1,334
id__813,3,2,2,2,5,2,3,2,0,5,2,3,2,0,1,3,1,2,1,813
id__1292,4,0,5,3,4,1,2,0,0,3,2,1,3,2,1,5,2,4,1,1292
id__1145,2,2,1,4,5,0,4,2,1,1,5,1,2,2,0,2,1,2,1,1145
id__619,2,2,2,3,2,1,4,2,2,2,2,1,2,0,3,3,2,4,1,619


# New User Input

In [12]:
# function for new users

def new_user(df, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga, metric):
    
    
    tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
          'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
          'running', 'gym', 'golf', 'basket', 'football', 'yoga']
    
    
    rating=[rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga, metric]
    
    
    
    n_user={k:v for k, v in list(zip(tags, n_rating[0]))}
    n_user['id']='id__'+str(len(s_data.id)+1)
    n_user['plan']=0
    
    
    names=list(df.index)
    df=df.append(n_user, ignore_index=True)
    names.append(n_user['id'])
    df.index=names
    
    
    
    similar = pd.DataFrame(1/(1 + squareform(pdist(df.iloc[:, :-1], metric))), 
                         index=df.index, columns=df.index)


    similarities = similar[n_user['id']].sort_values(ascending=False)



    closer_users=[]
    for e in similarities.index:
        if df.ix[e].plan==1:
            closer_users.append(e)
    
    
    
    return df.ix[closer_users[:10]]
    
    
    
    

# Input

In [13]:
print('Rate r&b from 0 to 5:')
rb = int(input())

Rate r&b from 0 to 5:
4


In [14]:
print('Rate rock from 0 to 5:')
rock = int(input())

Rate rock from 0 to 5:
5


In [15]:
print('Rate jazz from 0 to 5:')
jazz = int(input())

Rate jazz from 0 to 5:
5


In [16]:
print('Rate techno from 0 to 5:')
techno = int(input())

Rate techno from 0 to 5:
0


In [17]:
print('Rate pop from 0 to 5:')
pop = int(input())

Rate pop from 0 to 5:
0


In [18]:
print('Rate indie from 0 to 5:')
indie = int(input())

Rate indie from 0 to 5:
0


In [19]:
print('Rate cinema from 0 to 5:')
cinema = int(input())

Rate cinema from 0 to 5:
3


In [20]:
print('Rate theater from 0 to 5:')
theater = int(input())

Rate theater from 0 to 5:
5


In [21]:
print('Rate beers from 0 to 5:')
beers = int(input())

Rate beers from 0 to 5:
5


In [22]:
print('Rate wine from 0 to 5:')
wine = int(input())

Rate wine from 0 to 5:
2


In [23]:
print('Rate party from 0 to 5:')
party = int(input())

Rate party from 0 to 5:
4


In [24]:
print('Rate trips from 0 to 5:')
trips = int(input())

Rate trips from 0 to 5:
1


In [25]:
print('Rate running from 0 to 5:')
running = int(input())

Rate running from 0 to 5:
0


In [26]:
print('Rate gym from 0 to 5:')
gym = int(input())

Rate gym from 0 to 5:
0


In [27]:
print('Rate golf from 0 to 5:')
golf = int(input())

Rate golf from 0 to 5:
0


In [28]:
print('Rate basket from 0 to 5:')
basket = int(input())

Rate basket from 0 to 5:
2


In [29]:
print('Rate football from 0 to 5:')
football = int(input())

Rate football from 0 to 5:
0


In [30]:
print('Rate yoga from 0 to 5:')
yoga = int(input())

Rate yoga from 0 to 5:
1


In [31]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='cosine'  

In [32]:
plans=new_user(s_data, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
               party, trips, running, gym, golf, basket, football, yoga, metric)

In [33]:
display(plans)

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__26,0,3,5,4,4,1,5,2,1,0,3,1,5,0,2,4,2,1,1,26
id__19,4,4,5,5,4,4,4,0,1,3,3,1,5,1,0,4,1,0,1,19
id__438,1,4,1,3,5,1,3,1,1,2,5,2,5,1,2,3,0,1,1,438
id__782,2,1,5,4,3,1,5,0,4,2,4,0,4,1,3,1,2,5,1,782
id__1308,2,3,3,5,5,2,5,0,2,2,0,1,3,0,1,1,3,0,1,1308
id__303,1,2,3,4,3,4,5,1,2,4,4,1,4,5,0,5,1,2,1,303
id__1388,3,1,3,5,2,1,5,2,2,1,4,5,5,1,3,4,0,0,1,1388
id__1334,1,1,5,5,2,2,5,3,0,2,4,3,5,1,5,4,5,0,1,1334
id__163,2,2,3,3,4,4,4,0,5,4,2,2,3,1,2,3,3,1,1,163
id__1292,4,0,5,3,4,1,2,0,0,3,2,1,3,2,1,5,2,4,1,1292
