In [52]:
import pandas as pd
import plotly.express as px
import chart_studio as cs
import json
from sklearn.preprocessing import MinMaxScaler

pd.set_option("display.max_rows", None, "display.max_columns", None)

## Reddit Data: Triadic Census


In [1]:
dims = pd.read_csv('dimensions.csv',index_col=0).apply(pd.eval)
df = pd.read_csv('results/reddit_politics_sd/reddit.csv',index_col=0)
df[:11]

NameError: name 'pd' is not defined

## Reddit Data: 3 Dimensions

In [54]:
scale_triads = df.divide(df.sum(axis=0), axis=1)
scale_reddits = scale_triads.divide(scale_triads.sum(axis=1), axis=0)
scaled = scale_reddits.dot(dims)
display(scaled[:11])

Unnamed: 0,transitivity,equality,rationality
Conservative 0,0.80865,1.496,1.10155
Conservative 1,0.746316,1.51454,1.10595
Conservative 2,0.879964,1.48158,1.05047
Conservative 3,0.619976,1.4864,1.17615
Conservative 4,0.701938,1.55116,1.10308
Conservative 5,0.810766,1.43443,1.09458
Conservative 6,0.91291,1.43642,1.04368
Conservative 7,0.887692,1.46835,1.04848
Conservative 8,0.783697,1.49317,1.06068
Conservative 9,0.829981,1.44578,1.07221


## Reddit Data: Feature Scaling

In [59]:
scaler = MinMaxScaler()
scaled[['transitivity', 'equality', 'rationality']] = scaler.fit_transform(scaled[['transitivity', 'equality', 'rationality']])
display(scaled[:11])

Unnamed: 0,transitivity,equality,rationality
Conservative 0,0.116333,0.571552,0.725549
Conservative 1,0.077899,0.608375,0.73211
Conservative 2,0.160303,0.542898,0.649433
Conservative 3,0.0,0.552464,0.836715
Conservative 4,0.050536,0.681136,0.727824
Conservative 5,0.117637,0.44922,0.715161
Conservative 6,0.180617,0.453162,0.639311
Conservative 7,0.165069,0.516599,0.646468
Conservative 8,0.100947,0.565927,0.664647
Conservative 9,0.129485,0.47176,0.681823


## Reddit Data: Mean and Standard Deviation

In [60]:
def groupby_func(index):
    return index.rsplit(' ', 1)[0]

scaled['transitivity'] = pd.to_numeric(scaled['transitivity'])
scaled['equality'] = pd.to_numeric(scaled['equality'])
scaled['rationality'] = pd.to_numeric(scaled['rationality'])

mean = scaled.groupby(by=groupby_func, sort=False).mean()
std = scaled.groupby(by=groupby_func, sort=False).std()

display(mean[:5])
display(std[:5])

Unnamed: 0,transitivity,equality,rationality
Conservative,0.110324,0.544339,0.703904
conservatives,0.817714,0.349931,0.16472
Liberal,0.162635,0.43284,0.627877
Libertarian,0.150464,0.238666,0.882818
Republican,0.366962,0.619447,0.44134


Unnamed: 0,transitivity,equality,rationality
Conservative,0.052723,0.069573,0.056981
conservatives,0.117086,0.204579,0.090887
Liberal,0.113081,0.165539,0.071866
Libertarian,0.053989,0.061155,0.067782
Republican,0.148121,0.223651,0.07586


## Reddit Data: Merge

In [61]:
reddit = mean.merge(std, on=[mean.index], suffixes=('_mean', '_std'))
reddit = reddit.rename(columns={'key_0':'subreddit/party'})
reddit['colour'] = 'Reddit'
display(reddit)

Unnamed: 0,subreddit/party,transitivity_mean,equality_mean,rationality_mean,transitivity_std,equality_std,rationality_std,colour
0,Conservative,0.110324,0.544339,0.703904,0.052723,0.069573,0.056981,Reddit
1,conservatives,0.817714,0.349931,0.16472,0.117086,0.204579,0.090887,Reddit
2,Liberal,0.162635,0.43284,0.627877,0.113081,0.165539,0.071866,Reddit
3,Libertarian,0.150464,0.238666,0.882818,0.053989,0.061155,0.067782,Reddit
4,Republican,0.366962,0.619447,0.44134,0.148121,0.223651,0.07586,Reddit
5,socialism,0.343485,0.445085,0.499301,0.10834,0.114517,0.08891,Reddit


## V-Party Data: 2 Dimensions

In [66]:
vp = pd.read_csv('results/vparty/politics_dims.csv')
vp['transitivity_mean'] = 1 / vp['transitivity_inverse']
vp = vp.drop(columns=['transitivity_inverse'])
display(vp)

Unnamed: 0,subreddit/party,equality_mean,colour,transitivity_mean
0,Liberal Party of Australia,4.15,Australia,27.027027
1,Australian Labor Party,1.825,Australia,27.027027
2,Democratic Party,2.173,United States of America,8.474576
3,Republican Party,4.654,United States of America,2.403846
4,Conservatives,3.751,United Kingdom,10.869565
5,Liberal Party,3.044,United Kingdom,14.084507
6,Labour,1.631,United Kingdom,10.752688
7,Conservative Party of Canada (2003),4.181,Canada,9.090909
8,Liberal Party of Canada,1.782,Canada,17.857143
9,Christian Democratic Union,3.637,Germany,20.408163


## V-Party Data: Feature Scaling

In [67]:
vp[['transitivity_mean', 'equality_mean']] = scaler.fit_transform(vp[['transitivity_mean', 'equality_mean']])
display(vp)

Unnamed: 0,subreddit/party,equality_mean,colour,transitivity_mean
0,Liberal Party of Australia,0.833278,Australia,0.970413
1,Australian Labor Party,0.064175,Australia,0.970413
2,Democratic Party,0.179292,United States of America,0.239251
3,Republican Party,1.0,United States of America,0.0
4,Conservatives,0.70129,United Kingdom,0.333638
5,Liberal Party,0.467416,United Kingdom,0.460341
6,Labour,0.0,United Kingdom,0.329032
7,Conservative Party of Canada (2003),0.843533,Canada,0.263541
8,Liberal Party of Canada,0.04995,Canada,0.609023
9,Christian Democratic Union,0.663579,Germany,0.70956


## Combining & Plotting Both Datasets

Note: attributes not present in the V-Party dataset (rationality_mean, equality_std, transitivity_std, rationality_std) are filled with a 0.

In [70]:
graph = reddit.append(vp).reset_index(drop=True).fillna(0)
display(graph)

fig = px.scatter_3d(graph, x='transitivity_mean', y='equality_mean', z='rationality_mean', 
                   custom_data=['subreddit/party', 'transitivity_std', 'equality_std', 'rationality_std'], 
                   hover_name='subreddit/party', error_x='transitivity_std', error_y='equality_std', 
                   error_z='rationality_std', color='colour')
fig.update_traces(
    hovertemplate="<br>".join([
        "<b>Subreddit/Party: %{customdata[0]}</b>",
        "Transitivity mean: %{x:.2f}",
        "Equality mean: %{y:.2f}",
        "Rationality mean: %{z:.2f}",
        "Transitivity std: %{customdata[1]:.2f}",
        "Equality std: %{customdata[2]:.2f}",
        "Rationality std: %{customdata[3]:.2f}"
    ])
)
fig.show()

Unnamed: 0,subreddit/party,transitivity_mean,equality_mean,rationality_mean,transitivity_std,equality_std,rationality_std,colour
0,Conservative,0.110324,0.544339,0.703904,0.052723,0.069573,0.056981,Reddit
1,conservatives,0.817714,0.349931,0.16472,0.117086,0.204579,0.090887,Reddit
2,Liberal,0.162635,0.43284,0.627877,0.113081,0.165539,0.071866,Reddit
3,Libertarian,0.150464,0.238666,0.882818,0.053989,0.061155,0.067782,Reddit
4,Republican,0.366962,0.619447,0.44134,0.148121,0.223651,0.07586,Reddit
5,socialism,0.343485,0.445085,0.499301,0.10834,0.114517,0.08891,Reddit
6,Liberal Party of Australia,0.970413,0.833278,0.0,0.0,0.0,0.0,Australia
7,Australian Labor Party,0.970413,0.064175,0.0,0.0,0.0,0.0,Australia
8,Democratic Party,0.239251,0.179292,0.0,0.0,0.0,0.0,United States of America
9,Republican Party,0.0,1.0,0.0,0.0,0.0,0.0,United States of America


## Pushing Plots to Plotly
Link: https://plotly.com/~gracejyzhang/5/

In [71]:
with open('miscellaneous/credentials.json', mode='r') as f:
    creds = json.load(f)

cs.tools.set_credentials_file(username=creds['username'], api_key=creds['api_key'])
print(cs.plotly.plot(fig, filename = 'subreddits', auto_open=True))

https://plotly.com/~gracejyzhang/5/


## Notes

* Todo: statistical significane (p-value)
* Reddit dataset: William L. Hamilton*, Justine Zhang*, Cristian Danescu-Niculescu-Mizil, Dan Jurafsky, Jure Leskovec. Loyalty in Online Communities. (Currently under review at WWW 2017). *Equal contribution. http://snap.stanford.edu/data/web-RedditNetworks.html
* V-Party dataset: Lührmann, Anna, Nils Düpont, Masaaki Higashijima, Yaman Berker Kavasoglu, Kyle L. Marquardt, Michael Bernhard, Holger Döring, Allen Hicken, Melis Laebens, Staffan I. Lindberg, Juraj Medzihorsky, Anja Neundorf, Ora John Reuter, Saskia Ruth-Lovell, Keith R. Weghorst, Nina Wiesehomeier, JosephWright, Nazifa Alizada, Paul Bederke, Lisa Gastaldi, Sandra Grahn, Garry Hindle, Nina Ilchenko, Johannes von Römer, Steven Wilson, Daniel Pemstein, Brigitte Seim. 2020. Varieties of Party Identity and Organization (V-Party) Dataset V1. Varieties of Democracy (V-Dem) Project. https://doi.org/10.23696/vpartydsv1. https://www.v-dem.net/en/data/data/v-party-dataset/