# Demo Synth Data

In [1]:
# import libraries

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial.distance import pdist, squareform

In [2]:
# names and tags

names=['id__'+str(i) for i in range(2000)]

tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
      'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
      'running', 'gym', 'golf', 'basket', 'football', 'yoga']

In [3]:
# synth data

data=np.random.randint(0, 6, (len(names), len(tags)))
s_data=pd.DataFrame(data, columns=tags, index=names)
s_data['plan']=np.random.randint(0, 2, (len(names), 1))
s_data['id']=[i for i in range(len(names))]
s_data.shape

(2000, 20)

In [4]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='euclidean'  

In [5]:
squareform(pdist(s_data.iloc[:, :-2], metric))

array([[ 0.        , 13.34166406, 10.90871211, ..., 12.28820573,
         9.69535971,  8.        ],
       [13.34166406,  0.        , 13.37908816, ..., 10.04987562,
        11.83215957, 10.        ],
       [10.90871211, 13.37908816,  0.        , ...,  8.83176087,
         8.66025404,  9.8488578 ],
       ...,
       [12.28820573, 10.04987562,  8.83176087, ...,  0.        ,
         9.        , 10.14889157],
       [ 9.69535971, 11.83215957,  8.66025404, ...,  9.        ,
         0.        ,  9.16515139],
       [ 8.        , 10.        ,  9.8488578 , ..., 10.14889157,
         9.16515139,  0.        ]])

In [6]:
# similarity matrix

similar=pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-1], metric))), 
                         index=s_data.index, columns=s_data.index)

similar.head()

Unnamed: 0,id__0,id__1,id__2,id__3,id__4,id__5,id__6,id__7,id__8,id__9,...,id__1990,id__1991,id__1992,id__1993,id__1994,id__1995,id__1996,id__1997,id__1998,id__1999
id__0,1.0,0.069727,0.083651,0.090909,0.09261,0.089695,0.093051,0.078187,0.093498,0.069365,...,0.090499,0.090909,0.082709,0.077674,0.085983,0.095358,0.10056,0.075025,0.093498,0.110348
id__1,0.069727,1.0,0.069365,0.075025,0.065735,0.106088,0.088152,0.074798,0.0818,0.095358,...,0.098908,0.083651,0.066196,0.074798,0.074798,0.08121,0.076435,0.090094,0.077929,0.090499
id__2,0.083651,0.069365,1.0,0.109601,0.108152,0.0821,0.086688,0.083972,0.076923,0.078448,...,0.069545,0.097337,0.074798,0.085983,0.09683,0.089301,0.090094,0.101711,0.102904,0.092176
id__3,0.090909,0.075025,0.109601,1.0,0.0818,0.081503,0.079792,0.078187,0.089301,0.075486,...,0.078187,0.082403,0.07391,0.068144,0.092176,0.104141,0.08007,0.099449,0.096331,0.106088
id__4,0.09261,0.065735,0.108152,0.0818,1.0,0.083972,0.093953,0.092176,0.0818,0.080634,...,0.076678,0.089301,0.086688,0.081503,0.113504,0.106762,0.104141,0.09261,0.09261,0.073054


In [7]:
similar.shape

(2000, 2000)

# New User

In [8]:
# new fixed user

n_rating=np.random.randint(0, 6, (1, len(tags)))
n_user={k:v for k, v in list(zip(tags, n_rating[0]))}
n_user['id']='id_2001'
n_user['plan']=0

n_user

{'r&b': 5,
 'rock': 0,
 'jazz': 4,
 'techno': 2,
 'pop': 3,
 'indie': 0,
 'cinema': 3,
 'theater': 3,
 'beers': 2,
 'wine': 1,
 'party': 1,
 'trips': 0,
 'running': 0,
 'gym': 0,
 'golf': 1,
 'basket': 1,
 'football': 1,
 'yoga': 1,
 'id': 'id_2001',
 'plan': 0}

In [9]:
# new user introduced into system

s_data=s_data.append(n_user, ignore_index=True)
names.append(n_user['id'])
s_data.index=names

In [10]:
# similarity for new user

similar = pd.DataFrame(1/(1 + squareform(pdist(s_data.iloc[:, :-2], metric))), 
                         index=s_data.index, columns=s_data.index)


similarities = similar[n_user['id']].sort_values(ascending=False)



closer_users=[]
for e in similarities.index:
    if s_data.ix[e].plan==1:
        closer_users.append(e)
   
    
# closer users with plan
closer_users[:10]

['id__1648',
 'id__716',
 'id__762',
 'id__1057',
 'id__1412',
 'id__321',
 'id__876',
 'id__239',
 'id__1275',
 'id__31']

In [11]:
s_data.ix[closer_users[:10]]

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__1648,2,1,4,0,3,0,2,3,2,3,1,2,0,0,0,1,0,3,1,1648
id__716,4,2,3,0,4,2,3,3,4,1,2,1,1,2,1,0,2,0,1,716
id__762,4,2,3,1,0,1,2,3,4,1,2,0,1,1,3,2,2,2,1,762
id__1057,3,2,2,1,3,0,5,3,0,0,3,1,2,1,1,0,1,0,1,1057
id__1412,5,1,4,0,4,1,3,1,1,2,2,1,0,3,4,0,0,0,1,1412
id__321,4,1,3,1,3,2,1,4,1,2,2,0,2,3,0,2,0,3,1,321
id__876,3,2,2,4,2,2,4,3,1,3,1,1,1,0,4,0,0,2,1,876
id__239,5,1,4,2,3,1,2,4,5,0,0,3,1,4,0,1,2,1,1,239
id__1275,2,0,4,3,3,1,3,5,0,3,0,1,1,3,3,3,1,0,1,1275
id__31,5,2,2,3,3,0,1,2,1,1,0,3,4,1,2,0,1,1,1,31


# New User Input

In [12]:
# function for new users

def new_user(df, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga, metric):
    
    
    tags=['r&b', 'rock', 'jazz', 'techno', 'pop', 'indie',
          'cinema', 'theater', 'beers', 'wine', 'party', 'trips',
          'running', 'gym', 'golf', 'basket', 'football', 'yoga']
    
    
    rating=[rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
             party, trips, running, gym, golf, basket, football, yoga]
    
    
    
    n_user={k:v for k, v in list(zip(tags, rating))}
    n_user['id']='id__'+str(len(s_data.id)+1)
    n_user['plan']=0
    
    
    names=list(df.index)
    df=df.append(n_user, ignore_index=True)
    names.append(n_user['id'])
    df.index=names
    
    
    
    similar = pd.DataFrame(1/(1 + squareform(pdist(df.iloc[:, :-2], metric))), 
                         index=df.index, columns=df.index)


    similarities = similar[n_user['id']].sort_values(ascending=False)



    closer_users=[]
    for e in similarities.index:
        if df.ix[e].plan==1:
            closer_users.append(e)
    
    
    
    return df.ix[closer_users[:10]]
    
    
    
    

# Input

In [13]:
print('Rate r&b from 0 to 5:')
rb = int(input())

Rate r&b from 0 to 5:
5


In [14]:
print('Rate rock from 0 to 5:')
rock = int(input())

Rate rock from 0 to 5:
5


In [15]:
print('Rate jazz from 0 to 5:')
jazz = int(input())

Rate jazz from 0 to 5:
0


In [16]:
print('Rate techno from 0 to 5:')
techno = int(input())

Rate techno from 0 to 5:
1


In [17]:
print('Rate pop from 0 to 5:')
pop = int(input())

Rate pop from 0 to 5:
0


In [18]:
print('Rate indie from 0 to 5:')
indie = int(input())

Rate indie from 0 to 5:
4


In [19]:
print('Rate cinema from 0 to 5:')
cinema = int(input())

Rate cinema from 0 to 5:
3


In [20]:
print('Rate theater from 0 to 5:')
theater = int(input())

Rate theater from 0 to 5:
5


In [21]:
print('Rate beers from 0 to 5:')
beers = int(input())

Rate beers from 0 to 5:
1


In [22]:
print('Rate wine from 0 to 5:')
wine = int(input())

Rate wine from 0 to 5:
5


In [23]:
print('Rate party from 0 to 5:')
party = int(input())

Rate party from 0 to 5:
5


In [24]:
print('Rate trips from 0 to 5:')
trips = int(input())

Rate trips from 0 to 5:
1


In [25]:
print('Rate running from 0 to 5:')
running = int(input())

Rate running from 0 to 5:
2


In [26]:
print('Rate gym from 0 to 5:')
gym = int(input())

Rate gym from 0 to 5:
3


In [27]:
print('Rate golf from 0 to 5:')
golf = int(input())

Rate golf from 0 to 5:
0


In [28]:
print('Rate basket from 0 to 5:')
basket = int(input())

Rate basket from 0 to 5:
3


In [29]:
print('Rate football from 0 to 5:')
football = int(input())

Rate football from 0 to 5:
5


In [30]:
print('Rate yoga from 0 to 5:')
yoga = int(input())

Rate yoga from 0 to 5:
1


In [64]:
# metrics for squareform

'''
'euclidean', 'minkowski', 'cityblock', 'seuclidean', 'sqeuclidean', 'cosine', 'correlation'

'hamming', 'jaccard', 'chebyshev', 'canberra', 'braycurtis', 'mahalanobis', 'yule'

'matching', 'dice', 'kulsinski', 'rogerstanimoto', 'russellrao', 'sokalmichener'

'sokalsneath', 'wminkowski'
'''



metric='canberra'  

In [65]:
plans=new_user(s_data, rb, rock, jazz, techno, pop, indie, cinema, theater, beers, wine,
               party, trips, running, gym, golf, basket, football, yoga, metric)

In [39]:
display(plans)

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__1613,5,4,3,2,1,2,1,5,0,5,5,0,1,3,0,2,1,2,1,1613
id__1934,3,2,1,0,0,5,2,4,2,5,2,0,5,4,0,1,5,2,1,1934
id__177,5,5,1,1,0,5,2,4,5,4,2,1,0,1,2,4,5,3,1,177
id__1295,2,3,0,3,0,1,1,5,0,5,4,2,3,3,2,4,2,1,1,1295
id__1279,3,3,1,0,0,4,5,5,2,5,2,2,1,1,1,5,5,5,1,1279
id__370,4,4,1,1,1,1,0,4,0,1,5,1,4,5,2,4,4,1,1,370
id__1985,4,5,2,0,0,2,5,1,1,5,4,0,1,4,0,5,5,5,1,1985
id__1248,2,4,0,2,2,1,5,5,2,4,3,2,2,0,3,2,5,0,1,1248
id__1231,5,4,1,4,2,2,3,4,1,3,0,1,3,3,1,1,4,2,1,1231
id__1269,4,2,4,0,2,5,1,3,0,5,5,2,5,2,1,1,5,1,1,1269


In [66]:
display(plans)

Unnamed: 0,r&b,rock,jazz,techno,pop,indie,cinema,theater,beers,wine,party,trips,running,gym,golf,basket,football,yoga,plan,id
id__426,4,5,1,1,0,4,3,5,1,0,3,4,2,5,2,3,1,1,1,426
id__246,5,2,3,1,0,3,5,1,4,4,5,1,3,5,4,5,5,1,1,246
id__1495,5,4,3,5,0,4,1,3,1,5,4,1,3,4,5,2,2,3,1,1495
id__1925,3,0,0,3,0,2,5,4,1,5,3,2,2,3,1,0,4,1,1,1925
id__1440,3,5,5,3,0,5,2,3,0,4,4,1,4,4,0,2,2,4,1,1440
id__449,2,5,5,1,0,2,4,4,4,3,3,2,2,3,1,1,5,2,1,449
id__249,1,5,2,3,0,1,2,4,1,0,5,3,2,4,0,5,5,2,1,249
id__1402,3,4,0,3,5,4,5,4,4,4,1,4,3,4,0,3,4,5,1,1402
id__1637,3,5,0,5,5,4,1,0,0,4,3,1,2,5,0,2,4,1,1,1637
id__1295,2,3,0,3,0,1,1,5,0,5,4,2,3,3,2,4,2,1,1,1295
