In [1]:
import os
import sys
import glob
import numpy as np
import scipy as sp
import pandas as pd
import networkx as nx
from matplotlib import pyplot as plt

from scipy.stats import rayleigh
from scipy.stats import ks_2samp
from numpy import linspace
from numpy.random import choice
from networkx import Graph

from learning_dist_metrics.ldm import LDM
from learning_dist_metrics.dist_metrics import weighted_euclidean

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
DATA_PATH = "./data/sim_data_yi/" 

users_df   = pd.read_csv(DATA_PATH + "users_profile.csv", header = 0, sep = ",")
friends_df = pd.read_csv(DATA_PATH + "friendships.csv", header = 0, sep = ",")
dist_df    = pd.read_csv(DATA_PATH + "dist_mat.csv", header = 0, sep = ",")

friends_df = friends_df[friends_df.isFriend == 1]
friends_df["pair"] = friends_df[["uid_a", "uid_b"]].apply(lambda x: (int(x[0]), int(x[1])), axis=1)
friends_df.drop("isFriend", axis=1, inplace=True)
friends_df = friends_df[["pair", "uid_a", "uid_b"]]
friends_df.head(3)

cols = ["x0", "x1", "x2", "x3", "x4", "x5"]

## subset users data to retain profile only
profile_df = users_df[["ID"] + cols]
all_user_ids = list(set(users_df.ID))

In [3]:
from GWDLearner import *

profile_df = profile_df      # user profile
friends_ls = friends_df.pair # user relationship 

In [4]:
import cProfile, pstats, StringIO

pr = cProfile.Profile()
pr.enable()
print "Profiler start analyzing ... "

res = learning_wrapper(profile_df=profile_df, friends_pair=friends_ls, max_iter=5,\
                       k=2, c=0.1, dropout_rate=0.2, fit_rayleigh=True, verbose=True)

pr.disable()
print "Profiler stop here ... "

Profiler start analyzing ... 
Initiating ...
1 iteration is in processing ...
--- 34.55 seconds ---
--- 26.84 seconds ---
1) #fit: 62, #unfit: 38, #buffer: 0
1) #fit: 62, #unfit: 38, #buffer: 0
1) #fit: 67, #unfit: 0, #buffer: 33
2 iteration is in processing ...
--- 53.77 seconds ---
--- 38.46 seconds ---
1) #fit: 55, #unfit: 12, #buffer: 33
1) #fit: 77, #unfit: 12, #buffer: 11
1) #fit: 81, #unfit: 0, #buffer: 19
3 iteration is in processing ...
--- 37.69 seconds ---
--- 111.39 seconds ---
1) #fit: 52, #unfit: 29, #buffer: 19
1) #fit: 60, #unfit: 29, #buffer: 11
1) #fit: 66, #unfit: 0, #buffer: 34
** dropout is activating ...

4 iteration is in processing ...
--- 64.80 seconds ---
--- 42.51 seconds ---
1) #fit: 56, #unfit: 10, #buffer: 34
1) #fit: 79, #unfit: 10, #buffer: 11
1) #fit: 80, #unfit: 0, #buffer: 20
5 iteration is in processing ...
--- 39.67 seconds ---
--- 68.17 seconds ---
1) #fit: 55, #unfit: 25, #buffer: 20
1) #fit: 64, #unfit: 25, #buffer: 11
1) #fit: 68, #unfit: 0, #bu

In [5]:
s = StringIO.StringIO()
sortby = 'cumulative'
ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
ps.print_stats()
print s.getvalue()

         1226927261 function calls (1189028819 primitive calls) in 1031.140 seconds

   Ordered by: cumulative time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        3    0.000    0.000 1031.140  343.713 /home/beingzy/anaconda/lib/python2.7/site-packages/IPython/core/interactiveshell.py:3005(run_code)
        1    0.000    0.000 1031.140 1031.140 <ipython-input-4-83f818a3e67e>:7(<module>)
        1    0.014    0.014 1031.140 1031.140 GWDLearner.py:452(learning_wrapper)
       18    0.245    0.014  913.252   50.736 GWDLearner.py:262(ldm_train_with_list)
       18    0.004    0.000  913.007   50.723 learning_dist_metrics/ldm.py:48(fit)
       18    0.071    0.004  913.002   50.722 learning_dist_metrics/ldm.py:88(_fit)
   131576    0.157    0.000  891.123    0.007 /home/beingzy/anaconda/lib/python2.7/site-packages/scipy/optimize/optimize.py:280(function_wrapper)
     3364    0.026    0.000  890.629    0.265 learning_dist_metrics/ldm.py:143(objective_func)
  

In [7]:
import json

root_path = os.getcwd()
data_path = root_path + "/results/"

# extract component of interest
_, _, info_pkg = res

# create output connection
outfile = data_path + "beta_test_20150826_v01.json"
out_conn = open(outfile, 'w')
print "Writing out information..."
out_conn.write( json.dumps(info_pkg) )
out_conn.close()

Writing out information...


In [41]:
for item in range(10):
    
    print "%d / 10 ****************** \n" % item
    
    res = learning_wrapper(profile_df=profile_df, friends_pair=friends_ls, max_iter=50,\
                           k=2, c=0.1, dropout_rate=0.2, fit_rayleigh=True, verbose=True, \
                           buffer_group_enabled=False)
    # extract component of interest

    # create output connection
    out_fname = data_path + "beta_test_20150826_nobuffer_%d.json" % item
    out_conn = open(outfile, 'w')
    print "Writing out information..."
    out_conn.write( json.dumps(res) )
    out_conn.close()

0 / 10 ****************** 

Initiating ...
1 iteration is in processing ...
--- 25.23 seconds ---
--- 21.14 seconds ---
1) #fit: 55, #unfit: 45, #buffer: 0
1) #fit: 55, #unfit: 45, #buffer: 0
1) #fit: 100, #unfit: 0, #buffer: 0
2 iteration is in processing ...
--- 18.02 seconds ---
--- 20.02 seconds ---
1) #fit: 60, #unfit: 40, #buffer: 0
1) #fit: 60, #unfit: 40, #buffer: 0
1) #fit: 100, #unfit: 0, #buffer: 0
** dropout is activating ...

3 iteration is in processing ...
--- 34.97 seconds ---
--- 22.76 seconds ---
1) #fit: 57, #unfit: 43, #buffer: 0
1) #fit: 57, #unfit: 43, #buffer: 0
1) #fit: 100, #unfit: 0, #buffer: 0
4 iteration is in processing ...
--- 38.45 seconds ---
--- 20.04 seconds ---
1) #fit: 57, #unfit: 43, #buffer: 0
1) #fit: 57, #unfit: 43, #buffer: 0
1) #fit: 100, #unfit: 0, #buffer: 0
5 iteration is in processing ...
--- 20.77 seconds ---
--- 22.45 seconds ---
1) #fit: 59, #unfit: 41, #buffer: 0
1) #fit: 59, #unfit: 41, #buffer: 0
1) #fit: 100, #unfit: 0, #buffer: 0
**

KeyboardInterrupt: 

In [44]:
for item in range(10):
    
    print "%d / 10 ****************** \n" % item
    
    res = learning_wrapper(profile_df=profile_df, friends_pair=friends_ls, max_iter=50,\
                           k=2, c=0.1, dropout_rate=0.1, fit_rayleigh=True, verbose=False,\
                           buffer_group_enabled=False)

    # create output connection
    out_fname = data_path + "beta_test_20150826_nobuffer_droprate_dot1_%d.json" % item
    out_conn = open(outfile, 'w')
    print "Writing out information..."
    out_conn.write( json.dumps(info_pkg) )
    out_conn.close()

0 / 10 ****************** 

Initiating ...
1 iteration is in processing ...
--- 22.25 seconds ---
--- 77.95 seconds ---
2 iteration is in processing ...
--- 22.51 seconds ---
--- 23.85 seconds ---
3 iteration is in processing ...
--- 36.50 seconds ---
--- 22.37 seconds ---
** dropout is activating ...

4 iteration is in processing ...
--- 22.17 seconds ---
--- 28.65 seconds ---
** dropout is activating ...

5 iteration is in processing ...
--- 23.60 seconds ---
--- 29.45 seconds ---
** dropout is activating ...

6 iteration is in processing ...
--- 24.44 seconds ---
--- 25.77 seconds ---
** dropout is activating ...

7 iteration is in processing ...
--- 30.55 seconds ---
--- 24.06 seconds ---
** dropout is activating ...

8 iteration is in processing ...
--- 20.16 seconds ---
--- 27.34 seconds ---
** dropout is activating ...

9 iteration is in processing ...
--- 20.83 seconds ---
--- 46.11 seconds ---
** dropout is activating ...

10 iteration is in processing ...
--- 24.46 seconds --

ValueError: array must not contain infs or NaNs