In [1]:
import os
import sys
import glob

import numpy as np
import scipy as sp
import pandas as pd

from learning_dist_metrics.ldm import LDM
from learning_dist_metrics.datasets import load_data
from UserBatch import SimScore, WeightedEuclidean, UserBatch


import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
## ####################### ##
## CREATAE SIMULATE DATA   ##
## ####################### ##
from scipy.spatial.distance import euclidean
from numpy.random import uniform
from numpy.random import binomial

np.random.seed(20150408)
n_samples = 100
 
users = pd.DataFrame(
    {"ID": range(n_samples),
     "decision_style": np.random.choice([0, 1], size = n_samples, replace=True, p = (0.7, 0.3)),
     "x0": np.random.uniform(0, 1, n_samples),
     "x1": np.random.uniform(0, 1, n_samples),
     "x2": np.random.uniform(0, 1, n_samples),
     "x3": np.random.uniform(0, 1, n_samples),
     "x4": np.random.uniform(0, 1, n_samples),
     "x5": np.random.uniform(0, 1, n_samples)
     })

## ########################## ##
## DATA SIMULATION LOGIC      ##
## ########################## ##
def dist_func(a, b, weights = None):
    """Calculate the distance
    """
    if weights == None:
        weights = [1] * len(a)
        
    a = np.array(a) * weights
    b = np.array(b) * weights
    return euclidean(a, b)
 
def is_friends(dist, threshold = 0.3):
    """Return 1 if user decided to become a friend
       Or 0, if the user does not want to become a friend
       with other. The underlying logic governing the
       decision-makeing follows a mixture of two binomial
       distributions which is parametrized by distance
       between two users.

       Decision logic:
       ===============
       if distance < threshold, \beta = 0.7
       else, \beta = 0.2
    """
    if dist <= threshold:
        res = binomial(1, 0.9)
    else:
        res = binomial(1, 0.1)
    return res
 
def decision_rule_01(a, b):
    weights = [0.5, 0.5, 0.1, 0., 0., 0.]
    return dist_func(a, b, weights)
 
def decision_rule_02(a, b):
    weights = [0., 0., 0., 0.5, 0.3, 0.2]
    return dist_func(a, b, weights)
 
def decision_rule(a, b, type_idx):
    """ Consolidate decision_rule_01 and decision_rule_02
        into a single function to unify the interface
    """
    if type_idx == 0:
        res = decision_rule_01(a, b)
    elif type_idx == 1:
        res = decision_rule_02(a, b)
    else:
        res = dist_func(a, b)
    return res
 
def make_connection(decision_a, decision_b):
    """Return 1 if decision_a and decision_b are both 1(s)
       to indicate two parties aggree to be connected
       Otherwise, return 0
    """
    if decision_a == 1 and decision_b == 1:
        res = 1
    else:
        res = 0
    return res
 
def friendship_agreement(a_id, b_id, \
                         a_profile, b_profile, \
                         a_dec_rule, b_dec_rule):
    """ Mathematical simulation
    """
    dist_a = decision_rule(a_profile, b_profile, a_dec_rule)
    dist_b = decision_rule(b_profile, a_profile, b_dec_rule)
    dec_a = is_friends(dist_a)
    dec_b = is_friends(dist_b)
    is_connected = make_connection(dec_a, dec_b)
    if a_id > b_id:
        res = [b_id, a_id, is_connected]
    else:
        res = [a_id, b_id, is_connected]
    return res

## ############################### ##
## Generate Simulated Relationship ##
## ############################### ##
all_users = set(users.loc[:, "ID"])
rel_df = np.empty([len(all_users) * len(all_users), 3])
dist_df = np.empty([len(all_users) * len(all_users), 4])

row_counter = 0
for a_id in all_users:
    for b_id in all_users:
        if a_id != b_id:
            a_pos = [i for i, val in enumerate(users["ID"] == a_id) if val]
            b_pos = [i for i, val in enumerate(users["ID"] == b_id) if val]
            
            a_dec_rule = users.loc[a_pos, "decision_style"].as_matrix()
            a_profile  = users.iloc[a_pos, 2:].as_matrix()
            b_dec_rule = users.loc[b_pos, "decision_style"].as_matrix()
            b_profile  = users.iloc[b_pos, 2:].as_matrix()
            
            dist_df[row_counter, :] = [a_id, b_id, \
                                       decision_rule(a_profile, b_profile, a_dec_rule), \
                                       decision_rule(b_profile, a_profile, b_dec_rule)]
            rel_df[row_counter, :] = \
                friendship_agreement(a_id, b_id, a_profile, b_profile, \
                                     a_dec_rule, b_dec_rule)
            row_counter += 1
        else:
            rel_df[row_counter, :] = [a_id, b_id, np.nan]
            row_counter += 1

## Convert np.array() to pd.DataFrame()
dist_df = pd.DataFrame(dist_df, columns = ["uid_a", "uid_b", "distance_a2b", "distance_b2a"])
rel_df = pd.DataFrame(rel_df, columns = ["uid_a", "uid_b", "isFriend"])
## Drop A-A pairs
dist_df = dist_df.dropna()
rel_df = rel_df.dropna()

In [5]:
#dist_df
rel_df

Unnamed: 0,uid_a,uid_b,isFriend
1,0,1,0
2,0,2,1
3,0,3,0
4,0,4,1
5,0,5,1
6,0,6,0
7,0,7,1
8,0,8,1
9,0,9,0
10,0,10,0
