In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv("adj-matrix-US.csv")

In [3]:
us_elites = pd.read_csv("elitesUS.csv", usecols=["US.screen_name", "US.party"])
names = list(us_elites['US.screen_name'])
party = list(us_elites['US.party'])
us_party = pd.DataFrame(np.array(party).reshape(1, len(party)), columns=names)

In [4]:
df.columns

Index(['BarackObama', 'nytimes', 'Schwarzenegger', 'algore', 'maddow',
       'FoxNews', 'MittRomney', 'MMFlint', 'JerryBrownGov', 'SarahPalinUSA',
       ...
       'Sen_JoeManchin', 'SenDanCoats', 'RepJimMcDermott', 'replouiegohmert',
       'RandyNeugebauer', 'McConnellPress', 'RepJimMatheson',
       'SenJohnBarrasso', 'repcleaver', 'zachwamp'],
      dtype='object', length=318)

In [5]:
i = df.shape[0]
if i > 10000 :
    i = 10000
    subset = np.where(df.sum(axis=1) > 10)


In [6]:
subset = np.where(df.sum(axis=1) > 10)
subset = np.random.choice(np.array(subset[0]), i)

In [7]:
df_subset = df.iloc[subset]

In [8]:
subset_polit = np.where(df_subset.sum(axis=0) > 200)
df_subset_subset = df_subset[df.columns[subset_polit]]

In [9]:
y = df_subset_subset
J = df_subset_subset.shape[0]
K = df_subset_subset.shape[1]
stan_data = dict(
    J = J,
    K = K,
    N = J*K,
    jj = list(range(1, J+1))*K,
    kk = np.repeat(list(range(1, K+1)), J),
    y= y.as_matrix().flatten().tolist())

In [10]:
us_party = us_party[y.columns]
us_party = us_party.transpose()

In [11]:
phi = np.zeros(us_party.shape[0])
phi[np.where(us_party[0] == 'D')[0]] = -1
phi[np.where(us_party[0] == 'R')[0]] = 1
phi

array([-1.,  0.,  1., -1.,  0.,  0.,  1.,  0., -1.,  1.,  0.,  0.,  1.,
        0.,  1.,  1.,  1., -1., -1.,  1.,  1.,  0., -1.,  1.,  1.,  1.,
        0.,  1., -1.,  0.,  0.,  1.,  1., -1., -1.,  0.,  1., -1.,  0.,
       -1.,  1., -1., -1.,  1.,  1., -1., -1.,  1.,  1., -1.,  1.,  1.,
       -1.,  1.,  1., -1.,  1.,  1.,  1., -1., -1., -1.,  1., -1., -1.,
       -1.,  1., -1., -1.,  1.,  1.,  1., -1.,  0., -1.,  1., -1.,  1.,
        1.,  1.,  1., -1.,  1., -1., -1.,  1., -1.,  1.,  1.,  1., -1.,
       -1.,  1., -1.,  1.,  1.,  1., -1.,  1.,  1.,  1.,  1., -1.,  1.,
        1.,  1.,  1.,  1.,  1., -1.,  1.,  0.,  1., -1., -1.,  1., -1.,
        1.,  1.,  1.,  1.,  1.,  0.,  1.,  0., -1.,  1.,  0.,  0.,  1.,
       -1., -1., -1.,  0.,  1.,  1.,  0., -1., -1.,  1.,  1.,  1.,  0.,
       -1.,  1.,  1.,  1.,  0.,  1.,  0.,  0.,  1.,  1.,  1.,  1., -1.,
        0.,  1., -1., -1.,  1.,  1., -1.,  1.,  1.,  1.,  1.,  1.,  1.,
        1.,  1.])

In [12]:
def normalize(x):
    center = x - x.mean()
    return center/center.std()

In [13]:
stan_init = dict(
    alpha = normalize(np.log(y.sum(axis=0) + 0.0001)),
    sigma_alpha = 1,
    beta = normalize(np.log(y.sum(axis=1) + 0.0001)),
    mu_beta = 0,
    sigma_beta = 1,
    theta = np.random.normal(size=(J)),
    phi = phi,
    mu_phi = 0,
    sigma_phi = 1,
    gamma = np.random.normal()
    
)

In [14]:
y.sum(axis=0).shape

(171,)

In [15]:
import pystan

In [16]:
stan_model ="""
data {
  int<lower=1> J; // number of twitter users
  int<lower=1> K; // number of elite twitter accounts
  int<lower=1> N; // N = J x K
  int<lower=1,upper=J> jj[N]; // twitter user for observation n
  int<lower=1,upper=K> kk[N]; // elite account for observation n
  int<lower=0,upper=1> y[N]; // dummy if user i follows elite j
}
parameters {
  vector[K] alpha;
  vector[K] phi;
  vector[J] theta;
  vector[J] beta;
  real mu_beta;
  real<lower=0.1> sigma_beta;
  real mu_phi;
  real<lower=0.1> sigma_phi;
  real<lower=0.1> sigma_alpha;
  real gamma;
}
model {
  alpha ~ normal(0, sigma_alpha);
  beta ~ normal(mu_beta, sigma_beta);
  phi ~ normal(mu_phi, sigma_phi);
  theta ~ normal(0, 1); 
  for (n in 1:N)
    y[n] ~ bernoulli_logit( alpha[kk[n]] + beta[jj[n]] - 
      gamma * square( theta[jj[n]] - phi[kk[n]] ) );
}"""

In [17]:
#sm = pystan.stan(model_code=stan_model,
#                     data=stan_data,
#                     init=stan_init,
#                     iter=1,
#                     warmup=0,
#                     chains=1)
sm = pystan.StanModel(model_code=stan_model)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_f24e4100521aad1af08de4ce922cdb65 NOW.


In [45]:
samp = sm.sampling(data=stan_data,
                 init=[stan_init],
                 iter=5,
                 #thin=2
                 #warmup=0,
                 chains=1)

In [46]:
la = samp.extract()  # return a dictionary of arrays

In [48]:
phi = la['phi']
phi.shape

(3, 171)

In [54]:
order= np.argsort(phi[0, :])

In [22]:
import matplotlib
%matplotlib inline

In [47]:
phi.shape

(1, 171)

In [56]:
y.columns[order]

Index(['BarackObama', 'NancyPelosi', 'JoeBiden', 'TheDemocrats', 'algore',
       'DWStweets', 'SenatorReid', 'alfranken', 'Obama2012', 'GabbyGiffords',
       ...
       'RickSantorum', 'JimDeMint', 'MicheleBachmann', 'EricCantor',
       'marcorubio', 'johnboehner', 'KarlRove', 'RepPaulRyan', 'MittRomney',
       'SpeakerBoehner'],
      dtype='object', length=171)

In [59]:
phi[:, order]

array([[ -1.05459075e+00,  -1.05445198e+00,  -1.03411746e+00,
         -1.03393309e+00,  -1.03375454e+00,  -1.02798042e+00,
         -1.02632205e+00,  -1.02146171e+00,  -1.01648603e+00,
         -1.01513718e+00,  -1.01436993e+00,  -1.01356712e+00,
         -1.00662109e+00,  -1.00444346e+00,  -1.00218216e+00,
         -1.00122919e+00,  -1.00068559e+00,  -9.99773721e-01,
         -9.98758806e-01,  -9.97213236e-01,  -9.96792662e-01,
         -9.92372496e-01,  -9.91567376e-01,  -9.90027774e-01,
         -9.84684524e-01,  -9.84189933e-01,  -9.84084176e-01,
         -9.84041157e-01,  -9.79718380e-01,  -9.79499887e-01,
         -9.79404688e-01,  -9.78463744e-01,  -9.78454700e-01,
         -9.77624209e-01,  -9.76776397e-01,  -9.75986914e-01,
         -9.75835709e-01,  -9.75688304e-01,  -9.75079070e-01,
         -9.74400243e-01,  -9.74268725e-01,  -9.73934310e-01,
         -9.73726369e-01,  -9.72958004e-01,  -9.69800025e-01,
         -9.69799668e-01,  -9.68877994e-01,  -9.68101357e-01,
        