# print out Table 2

In [1]:
import pandas as pd
import numpy as np
import networkx as nx
import math
import os

ROOTPATH = os.getcwd()[:-3]

## setup

In [2]:
def compute_surprise(nt, t, t0):
    p = t/nt
    p0 = t0/nt
    return (t-t0)/math.sqrt(nt*p0*(1-p0))


def print_table2(triads, triads_shuffled):
    names = ["epinions", "slashdot", "wikipedia"]
    triad_order = ["T3", "T1", "T2", "T0"]
    triads = triads[names].reindex(triad_order)
    triads_shuffled = triads_shuffled[names].reindex(triad_order)
    total = np.sum(triads.values, axis=0)
    total_shuffled = np.sum(triads_shuffled.values, axis=0)
    
    df_all = pd.DataFrame()
    for i,name in enumerate(names):
        df = pd.DataFrame()
        df["name"] = [name]*4
        df["triad"] = triad_order
        df["Ti"] = [int(x) for x in triads[name].values]
        df["P(Ti)"] = triads[name].values/total[i]
        df["P0(Ti)"] = triads_shuffled[name].values/total_shuffled[i]
        df["s(Ti)"] = [compute_surprise(total[i],t,t0) for t,t0 in \
                       zip(triads[name].values, triads_shuffled[name].values)]
        df_all = df_all.append(df)
    return df_all

## using my first counting method

In [3]:
triads1 = pd.read_csv(ROOTPATH + "/data/summary/static_triads1.csv").set_index("triads")
triads1_shuffled = pd.read_csv(ROOTPATH + "/data/summary/static_triads1_shuffled.csv").set_index("triads")

In [4]:
print_table2(triads1, triads1_shuffled)  # proportion-wise pretty close but underestimated the number of triads

Unnamed: 0,name,triad,Ti,P(Ti),P0(Ti),s(Ti)
0,epinions,T3,4074387,0.804129,0.583766,335.669141
1,epinions,T1,390712,0.077112,0.050675,119.415055
2,epinions,T2,540705,0.106715,0.357774,-1564.492918
3,epinions,T0,61028,0.012045,0.007784,50.047912
0,slashdot,T3,425327,0.722511,0.456948,143.029444
1,slashdot,T1,78325,0.133052,0.10124,-15.723103
2,slashdot,T2,72882,0.123806,0.425166,-724.771687
3,slashdot,T0,12145,0.020631,0.016646,-12.35153
0,wikipedia,T3,420769,0.668858,0.494414,234.950026
1,wikipedia,T1,53238,0.084628,0.090246,-28.253162


## using my second counting method [on-going]

In [7]:
triads2 = pd.read_csv(ROOTPATH + "/data/summary/static_triads2.csv").set_index("triads")
triads2_shuffled = pd.read_csv(ROOTPATH + "/data/summary/static_triads2_shuffled.csv").set_index("triads")

In [8]:
print_table2(triads2, triads2_shuffled) 

Unnamed: 0,name,triad,Ti,P(Ti),P0(Ti),s(Ti)
0,epinions,T3,11616708,0.872278,0.621683,1885.702401
1,epinions,T1,688557,0.051703,0.054841,-50.306715
2,epinions,T2,924739,0.069437,0.320281,-1961.950236
3,epinions,T0,87668,0.006583,0.003194,219.128033
0,slashdot,T3,1266646,0.839892,0.465364,922.09343
1,slashdot,T1,115884,0.076841,0.117505,-155.075939
2,slashdot,T2,109303,0.072477,0.405703,-833.391249
3,slashdot,T0,16272,0.01079,0.011428,-7.370175
0,wikipedia,T3,540842,0.685437,0.483185,359.516696
1,wikipedia,T1,67278,0.085265,0.10898,-67.600888
