In [44]:
import pandas as pd
import numpy as np
import pystan
%matplotlib inline

In [2]:
df = pd.read_csv("adj-matrix-US.csv", sep=' ')

In [3]:
us_elites = pd.read_csv("elitesUS.csv", usecols=["US.screen_name", "US.party"])
names = list(us_elites['US.screen_name'])
party = list(us_elites['US.party'])
us_party = pd.DataFrame(np.array(party).reshape(1, len(party)), columns=names)

In [4]:
df.columns

Index(['BarackObama', 'nytimes', 'Schwarzenegger', 'algore', 'maddow',
       'FoxNews', 'MittRomney', 'MMFlint', 'JerryBrownGov', 'SarahPalinUSA',
       ...
       'Sen_JoeManchin', 'SenDanCoats', 'RepJimMcDermott', 'replouiegohmert',
       'RandyNeugebauer', 'McConnellPress', 'RepJimMatheson',
       'SenJohnBarrasso', 'repcleaver', 'zachwamp'],
      dtype='object', length=318)

In [5]:
i = df.shape[0]
if i > 10000 :
    i = 1000
subset = np.where(df.sum(axis=1) > 10)
subset = np.random.choice(np.array(subset[0]), i)
df_subset = df.iloc[subset]

In [6]:
subset_polit = np.where(df_subset.sum(axis=0) > 200)
df_subset_subset = df_subset[df.columns[subset_polit]]

In [7]:
y = df_subset_subset
J = df_subset_subset.shape[0]
K = df_subset_subset.shape[1]
stan_data = dict(
    J = J,
    K = K,
    N = J*K,
    jj = list(range(1, J+1))*K,
    kk = np.repeat(list(range(1, K+1)), J),
    y= y.as_matrix().flatten().tolist())

In [8]:
us_party = us_party[y.columns]
us_party = us_party.transpose()

In [9]:
phi = np.zeros(us_party.shape[0])
phi[np.where(us_party[0] == 'D')[0]] = -1
phi[np.where(us_party[0] == 'R')[0]] = 1
phi

array([-1.,  0.,  0.,  0.,  1.,  0.,  0.,  1.,  0.,  1.,  1., -1., -1.,
        1.,  0.,  1.,  1.,  1.,  0., -1.,  1.,  1., -1.,  0.,  1., -1.,
        1.,  1.,  1.,  1.,  0.,  0.,  1.,  1.])

In [10]:
def normalize(x):
    center = x - x.mean()
    return center/center.std()

In [11]:
stan_init = dict(
    alpha = normalize(np.log(y.sum(axis=0) + 0.0001)),
    sigma_alpha = 1,
    beta = normalize(np.log(y.sum(axis=1) + 0.0001)),
    mu_beta = 0,
    sigma_beta = 1,
    theta = np.random.normal(size=(J)),
    phi = phi,
    mu_phi = 0,
    sigma_phi = 1,
    gamma = np.random.normal()
    
)

In [12]:
y.sum(axis=0).shape

(34,)

In [14]:
stan_model ="""
data {
  int<lower=1> J; // number of twitter users
  int<lower=1> K; // number of elite twitter accounts
  int<lower=1> N; // N = J x K
  int<lower=1,upper=J> jj[N]; // twitter user for observation n
  int<lower=1,upper=K> kk[N]; // elite account for observation n
  int<lower=0,upper=1> y[N]; // dummy if user i follows elite j
}
parameters {
  vector[K] alpha;
  vector[K] phi;
  vector[J] theta;
  vector[J] beta;
  real mu_beta;
  real<lower=0.1> sigma_beta;
  real mu_phi;
  real<lower=0.1> sigma_phi;
  real gamma;
}
model {
  alpha ~ normal(0, 1);
  beta ~ normal(mu_beta, sigma_beta);
  phi ~ normal(mu_phi, sigma_phi);
  theta ~ normal(0, 1); 
  for (n in 1:N)
    y[n] ~ bernoulli_logit( alpha[kk[n]] + beta[jj[n]] - 
      gamma * square( theta[jj[n]] - phi[kk[n]] ) );
}"""

In [15]:
#sm = pystan.stan(model_code=stan_model,
#                     data=stan_data,
#                     init=stan_init,
#                     iter=1,
#                     warmup=0,
#                     chains=1)
sm = pystan.StanModel(model_code=stan_model)

INFO:pystan:COMPILING THE C++ CODE FOR MODEL anon_model_448ecd31a46f77c3c105b8438015699a NOW.


In [17]:
samp = sm.sampling(data=stan_data,
                 init=[stan_init],
                 iter=100,
                 #thin=2,
                 warmup=10,
                 chains=1)

In [18]:
la = samp.extract()  # return a dictionary of arrays

In [39]:
for par in la:
    print(par)
    print(la[par].shape)

alpha
(90, 34)
phi
(90, 34)
theta
(90, 1000)
beta
(90, 1000)
mu_beta
(90,)
sigma_beta
(90,)
mu_phi
(90,)
sigma_phi
(90,)
gamma
(90,)
lp__
(90,)


In [42]:
mu_beta = la['mu_beta']
beta = la['beta']
phi = la['phi']

In [25]:
phi.shape

(34,)

In [26]:
beta.shape

(90, 1000)

In [48]:
phi[:, 2]
phi.mean(axis=0)

array([-0.43926685,  1.49952359,  0.6428717 , -0.18188526,  1.12834305,
        1.34954399, -0.21516883, -0.72380011, -1.39541479,  0.16869521,
       -1.20320217, -0.059415  ,  0.50037076, -0.20559674,  0.24603376,
        0.48359928, -0.1565916 ,  0.16881284,  0.8916131 , -0.55223734,
        1.52940771, -0.10761197, -0.23674257,  0.35005124,  1.07776326,
       -1.63978373, -1.3969644 ,  1.0881272 , -1.20048533, -0.27774292,
        0.50965051, -1.9857578 , -0.46647299, -0.64256915])

In [51]:
for a, b in zip(y.columns, phi.mean(axis=0)):
    print(a +" : "+str(b))

BarackObama : -0.439266852971
nytimes : 1.49952358848
maddow : 0.642871699741
FoxNews : -0.181885260509
MittRomney : 1.12834304681
MMFlint : 1.34954399293
glennbeck : -0.215168831437
KarlRove : -0.723800111414
KeithOlbermann : -1.39541478876
SpeakerBoehner : 0.168695212983
RepPaulRyan : -1.20320216642
JoeBiden : -0.0594150047593
NancyPelosi : 0.500370757327
GovMikeHuckabee : -0.205596739695
Heritage : 0.246033759529
RickSantorum : 0.483599284907
THEHermanCain : -0.156591599344
johnboehner : 0.168812836846
limbaugh : 0.891613095643
TheDemocrats : -0.552237337478
MicheleBachmann : 1.52940771024
GovernorPerry : -0.107611966462
DWStweets : -0.236742566547
SenSanders : 0.350051237019
GOPLeader : 1.07776325927
alfranken : -1.63978373036
AllenWest : -1.39696440186
Senate_GOPs : 1.08812720313
newtgingrich : -1.20048532743
marcorubio : -0.277742916793
MotherJones : 0.509650507966
DRUDGE : -1.98575780248
JimDeMint : -0.466472987086
EricCantor : -0.642569148567


In [54]:
order= np.argsort(phi[0, :])

In [22]:
import matplotlib
%matplotlib inline

In [47]:
phi.shape

(1, 171)

In [56]:
y.columns[order]

Index(['BarackObama', 'NancyPelosi', 'JoeBiden', 'TheDemocrats', 'algore',
       'DWStweets', 'SenatorReid', 'alfranken', 'Obama2012', 'GabbyGiffords',
       ...
       'RickSantorum', 'JimDeMint', 'MicheleBachmann', 'EricCantor',
       'marcorubio', 'johnboehner', 'KarlRove', 'RepPaulRyan', 'MittRomney',
       'SpeakerBoehner'],
      dtype='object', length=171)

In [45]:
phi[:, order]

NameError: name 'order' is not defined