In [21]:
import pandas as pd
import plotly.express as px
import chart_studio as cs
import json

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Triadic Census


In [22]:
dims = pd.read_csv('dimensions.csv',index_col=0).apply(pd.eval)
df = pd.read_csv('results/reddit/final_reddit.csv',index_col=0)
df[:11]

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
letsplay 0,133580589.0,1351165,1236397,4311,4919,6298,15847,15151,183,20,16495,178,194,273,826,394
letsplay 1,122289773.0,1349974,1070801,4971,4738,7018,12944,14408,186,39,12561,155,170,229,543,224
letsplay 2,160341754.0,1554224,1232953,5006,6235,8167,14800,14049,169,15,12447,158,148,206,439,174
letsplay 3,133327933.0,1552604,1278983,5341,6361,8190,16840,17289,256,48,17398,211,226,341,845,374
letsplay 4,158005466.0,1732390,1407152,5568,7170,9071,18855,19246,246,48,19100,247,288,368,1123,442
letsplay 5,189702783.0,2029225,1653747,6823,7374,10663,23277,24432,309,49,27038,325,365,458,1476,756
letsplay 6,200072756.0,2130060,1850575,7943,9178,12572,28824,27099,397,52,30054,411,433,572,1766,963
letsplay 7,145560180.0,1543731,1214302,4342,6005,7443,15313,13389,217,35,12946,166,197,260,621,214
letsplay 8,125090949.0,1401950,1119797,4910,5261,6987,15171,14817,187,28,14979,159,206,235,699,325
letsplay 9,129738800.0,1406011,1130799,5131,5294,6781,13549,15227,185,34,13698,156,224,241,577,268


## 3 Dimensions

In [23]:
scale_triads = df.divide(df.sum(axis=0), axis=1)
scale_reddits = scale_triads.divide(scale_triads.sum(axis=1), axis=0)
scaled = scale_reddits.dot(dims)
display(scaled[:11])

Unnamed: 0,transitivity,equality,rationality
letsplay 0,1.87608,1.28604,0.704693
letsplay 1,1.51838,1.44114,0.785312
letsplay 2,1.4145,1.37713,0.808248
letsplay 3,1.68302,1.37651,0.746526
letsplay 4,1.77658,1.34653,0.733801
letsplay 5,1.97954,1.27601,0.691576
letsplay 6,2.04565,1.23256,0.682179
letsplay 7,1.50969,1.40893,0.791857
letsplay 8,1.73406,1.35253,0.746725
letsplay 9,1.60855,1.42109,0.782804


## Mean and Standard Deviation

In [29]:
def groupby_func(index):
    return index.rsplit(' ', 1)[0]

scaled['transitivity'] = pd.to_numeric(scaled['transitivity'])
scaled['equality'] = pd.to_numeric(scaled['equality'])
scaled['rationality'] = pd.to_numeric(scaled['rationality'])

mean = scaled.groupby(by=groupby_func, sort=False).mean()
std = scaled.groupby(by=groupby_func, sort=False).std()

display(mean[:5])
display(std[:5])

Unnamed: 0,transitivity,equality,rationality
letsplay,1.722338,1.352821,0.74594
DJs,0.888145,1.661795,0.923705
IDAP,0.478611,1.721489,1.067061
climateskeptics,2.298001,1.200153,0.6377
RandomActsOfGaming,0.488858,1.951024,0.967563


Unnamed: 0,transitivity,equality,rationality
letsplay,0.199863,0.06459,0.042397
DJs,0.116464,0.097635,0.03719
IDAP,0.318127,0.181861,0.083224
climateskeptics,0.211911,0.102074,0.049238
RandomActsOfGaming,0.181109,0.114363,0.063559


## Plots

In [49]:
graph = mean.merge(std, on=[mean.index], suffixes=('_mean', '_std'))
graph = graph.rename(columns={'key_0':'subreddit'})
display(graph[:5])

fig = px.scatter_3d(graph, x='transitivity_mean', y='equality_mean', z='rationality_mean', 
                   custom_data=['subreddit', 'transitivity_std', 'equality_std', 'rationality_std'], 
                   hover_name='subreddit', error_x='transitivity_std', error_y='equality_std', error_z='rationality_std')
fig.update_traces(
    hovertemplate="<br>".join([
        "<b>Subreddit: %{customdata[0]}</b>",
        "Transitivity mean: %{x:.2f}",
        "Equality mean: %{y:.2f}",
        "Rationality mean: %{z:.2f}",
        "Transitivity std: %{customdata[1]:.2f}",
        "Equality std: %{customdata[2]:.2f}",
        "Rationality std: %{customdata[3]:.2f}"
    ])
)
fig.show()

Unnamed: 0,subreddit,transitivity_mean,equality_mean,rationality_mean,transitivity_std,equality_std,rationality_std
0,letsplay,1.722338,1.352821,0.74594,0.199863,0.06459,0.042397
1,DJs,0.888145,1.661795,0.923705,0.116464,0.097635,0.03719
2,IDAP,0.478611,1.721489,1.067061,0.318127,0.181861,0.083224
3,climateskeptics,2.298001,1.200153,0.6377,0.211911,0.102074,0.049238
4,RandomActsOfGaming,0.488858,1.951024,0.967563,0.181109,0.114363,0.063559


## Pushing Plots to Plotly
Link: https://plotly.com/~gracejyzhang/1/

Adding slider will likely require Dash or a different software

In [None]:
## with open('miscellaneous/credentials.json', mode='r') as f:
##     creds = json.load(f)

## chart_studio.tools.set_credentials_file(username=creds['username'], api_key=creds['api_key'])
## print(chart_studio.plotly.plot(fig, filename = 'subreddits', auto_open=True))

## Notes

* Todo: statistical significane (p-value)
* Dataset: William L. Hamilton*, Justine Zhang*, Cristian Danescu-Niculescu-Mizil, Dan Jurafsky, Jure Leskovec. Loyalty in Online Communities. (Currently under review at WWW 2017). *Equal contribution. http://snap.stanford.edu/data/web-RedditNetworks.html

In [None]:
# temp
months = df.sum(axis=1)

def groupby_func(index):
    return index.rsplit(' ', 1)[0]

tmp = df.groupby(by=groupby_func, sort=False).sum().sum(axis=1)
totals = pd.Series(np.repeat(tmp.values,11,axis=0))
totals.index = months.index

weights = months / totals
weights[:11] 