In [141]:
%matplotlib notebook

import common
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import plotly.express as px

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Triadic Census
Note that the triadic census for the groups (such as *socialism + Libertarian*) were obtained by generating a network with all nodes/edges in the included subreddits; not simply by combining the triadic censes of the individual subreddits.

In [108]:
dims = pd.read_csv('dimensions.csv',index_col=0).apply(pd.eval)
df = pd.read_csv('aws_processed/reddit.csv',index_col=0)
df[:11]

Unnamed: 0,003,012,102,021D,021U,021C,111D,111U,030T,030C,201,120D,120U,120C,210,300
letsplay 0,133580589,1351165,1236397,4311,4919,6298,15847,15151,183,20,16495,178,194,273,826,394
letsplay 1,122289773,1349974,1070801,4971,4738,7018,12944,14408,186,39,12561,155,170,229,543,224
letsplay 2,160341754,1554224,1232953,5006,6235,8167,14800,14049,169,15,12447,158,148,206,439,174
letsplay 3,133327933,1552604,1278983,5341,6361,8190,16840,17289,256,48,17398,211,226,341,845,374
letsplay 4,158005466,1732390,1407152,5568,7170,9071,18855,19246,246,48,19100,247,288,368,1123,442
letsplay 5,189702783,2029225,1653747,6823,7374,10663,23277,24432,309,49,27038,325,365,458,1476,756
letsplay 6,200072756,2130060,1850575,7943,9178,12572,28824,27099,397,52,30054,411,433,572,1766,963
letsplay 7,145560180,1543731,1214302,4342,6005,7443,15313,13389,217,35,12946,166,197,260,621,214
letsplay 8,125090949,1401950,1119797,4910,5261,6987,15171,14817,187,28,14979,159,206,235,699,325
letsplay 9,129738800,1406011,1130799,5131,5294,6781,13549,15227,185,34,13698,156,224,241,577,268


## 3 Dimensions

In [109]:
scale_triads = df.divide(df.sum(axis=0), axis=1)
scale_reddits = scale_triads.divide(scale_triads.sum(axis=1), axis=0)
scaled = scale_reddits.dot(dims)
display(scaled[:11])

Unnamed: 0,transitivity,equality,rationality
letsplay 0,1.96182,1.26078,0.686472
letsplay 1,1.581,1.42242,0.770981
letsplay 2,1.46708,1.36335,0.795574
letsplay 3,1.75693,1.35424,0.730439
letsplay 4,1.8517,1.32371,0.717205
letsplay 5,2.07112,1.2484,0.672315
letsplay 6,2.14105,1.20437,0.662376
letsplay 7,1.56638,1.39217,0.778281
letsplay 8,1.81138,1.32904,0.729524
letsplay 9,1.67671,1.39983,0.766899


## Mean and Standard Deviation

In [107]:
def groupby_func(index):
    return index.rsplit(' ', 1)[0]

scaled['transitivity'] = pd.to_numeric(scaled['transitivity'])
scaled['equality'] = pd.to_numeric(scaled['equality'])
scaled['rationality'] = pd.to_numeric(scaled['rationality'])

mean = scaled.groupby(by=groupby_func, sort=False).mean()
std = scaled.groupby(by=groupby_func, sort=False).std()

display(mean[:5])
display(std[:5])

Unnamed: 0,transitivity,equality,rationality
letsplay,1.796939,1.330441,0.729448
DJs,0.91857,1.653727,0.913916
IDAP,0.484227,1.727715,1.061774
climateskeptics,2.401097,1.168435,0.617117
RandomActsOfGaming,0.507154,1.948477,0.959277


Unnamed: 0,transitivity,equality,rationality
letsplay,0.213478,0.068041,0.044597
DJs,0.127779,0.09799,0.038806
IDAP,0.33404,0.180494,0.084897
climateskeptics,0.223435,0.102861,0.050467
RandomActsOfGaming,0.189582,0.115308,0.065104


## Plots

In [161]:
graph = mean.merge(std, on=[mean.index], suffixes=('_mean', '_std'))
graph = graph.rename(columns={'key_0':'subreddit'})
display(graph[:5])

fig = px.scatter_3d(graph, x='transitivity_mean', y='equality_mean', z='rationality_mean', 
                   hover_data=['transitivity_std', 'equality_std', 'rationality_std', 'subreddit'],
                   error_x='transitivity_std', error_y='equality_std', error_z='rationality_std')
fig.show()

Unnamed: 0,subreddit,transitivity_mean,equality_mean,rationality_mean,transitivity_std,equality_std,rationality_std
0,letsplay,1.796939,1.330441,0.729448,0.213478,0.068041,0.044597
1,DJs,0.91857,1.653727,0.913916,0.127779,0.09799,0.038806
2,IDAP,0.484227,1.727715,1.061774,0.33404,0.180494,0.084897
3,climateskeptics,2.401097,1.168435,0.617117,0.223435,0.102861,0.050467
4,RandomActsOfGaming,0.507154,1.948477,0.959277,0.189582,0.115308,0.065104


## Notes

* Todo: statistical significane (p-value)
* Dataset: William L. Hamilton*, Justine Zhang*, Cristian Danescu-Niculescu-Mizil, Dan Jurafsky, Jure Leskovec. Loyalty in Online Communities. (Currently under review at WWW 2017). *Equal contribution. http://snap.stanford.edu/data/web-RedditNetworks.html

In [None]:
# temp
months = df.sum(axis=1)

def groupby_func(index):
    return index.rsplit(' ', 1)[0]

tmp = df.groupby(by=groupby_func, sort=False).sum().sum(axis=1)
totals = pd.Series(np.repeat(tmp.values,11,axis=0))
totals.index = months.index

weights = months / totals
weights[:11] 