In [1]:
import pandas as pd
import numpy as np
# numpy version should be 1.13, because of the heaviside function
from scipy.stats import pearsonr
from scipy.spatial.distance import squareform

In [2]:
print(np.__version__)

1.13.1


In [3]:
def load_data():
    df = pd.read_csv('data/daejeon.csv', delimiter='\t', index_col=False)
    return df

In [4]:
df = load_data()

In [5]:
def get_pref_mats(df):
    '''
    This function generates the check-in matrix and sentiment-preference matrix
    
    Input:
    df, dataFrame. Return from load_data function

    Output:
    1. pref_checkin, check-in preference matrix
    2. pref_sentiment, sentiment preference matrix
    '''
    mem_id = sorted(df['Member ID'].unique()); loc_id = sorted(df['Restaurant ID'].unique())
    pref_checkin = pd.DataFrame(0, index=mem_id, columns=loc_id)
    pref_sentiment = pd.DataFrame(0, index=mem_id, columns=loc_id)
    
    for index, row in df.iterrows():
        # make sentiment preference matrix
        member = row['Member ID']; restaurant = row['Restaurant ID']; rating = row['Rating']
        pref_sentiment.loc[member, restaurant] = rating
        checkin = pref_checkin.loc[member, restaurant]
        if checkin == 0:
            checkin = 1
        elif checkin > 0:
            checkin += 1

        if checkin >= 3:
            checkin = 3

        pref_checkin.loc[member, restaurant] = checkin
    
    pref_checkin = np.array(pref_checkin); pref_sentiment = np.array(pref_sentiment)
    return pref_checkin, pref_sentiment

In [6]:
pref_checkin, pref_sentiment = get_pref_mats(df)

In [7]:
value1 = np.sign(pref_checkin - pref_sentiment)
value2 = np.heaviside(np.abs(pref_checkin - pref_sentiment)-2, 0.5)
pref_final = pref_checkin - np.sign(pref_checkin - pref_sentiment) * np.heaviside(np.abs(pref_checkin - pref_sentiment)-2, 0.5)
print(pref_final.shape)

(1153, 852)


In [8]:
Z = 5; N, I = pref_final.shape

In [9]:
def get_UV(N, I, Z):
    U = np.random.rand(N, Z)
    V = np.random.rand(Z, I)
    
    return U, V

In [10]:
U, V = get_UV(N, I, Z)
print(U.shape)
print(V.shape)

(1153, 5)
(5, 852)


In [11]:
pref_final.shape

(1153, 852)

In [13]:
coef = []
for n in range(N):
    temp = []
    for i in range(N):
        temp.append(pearsonr(pref_final[n], pref_final[i])[0])
    coef.append(temp)
    

In [16]:
coef = np.array(coef)
print(coef.shape)

(1153, 1153)


In [17]:
N, _ = pref_final.shape
print(N)

1153
