### Import libraries

In [132]:
import pandas as pd 
import numpy as np
import pymc3 as pm
from sklearn.metrics import confusion_matrix, accuracy_score

### Read data

In [103]:
df  = pd.read_csv("WVS.csv") # exported the dataset from R
df.drop(df.columns[[0]], axis=1, inplace=True)
y = df.iloc[:,-1]
X = X.values
y = y.values
X = df.iloc[:, df.columns != 'y' ]

### Model specification

In [131]:
print('Running on PyMC3 v{}'.format(pm.__version__))

Running on PyMC3 v3.5


In [105]:
N = X.shape[0]
D = X.shape[1]

In [106]:
with pm.Model() as mod:
    
    # Priors
    sigma = pm.HalfNormal('sigma', sd = 1)   
    
    beta = pm.Normal('beta', mu=0, sd=sigma, shape=D) 
    
    lp = pm.math.dot(X, beta)
    
    cutpoints = pm.Normal("cutpoints", mu=[-0.01,0], sd=20, shape=2,
                           transform=pm.distributions.transforms.ordered)
    
    # Likelihood 
    y_obs = pm.OrderedLogistic("y_obs", eta=lp, cutpoints=cutpoints, observed=y-1)

### Sampling

In [153]:
with mod:
    # draw posterior samples
    trace = pm.sample(5000, tune=5000, nuts_kwargs=dict(target_accept=.85))

Auto-assigning NUTS sampler...
Initializing NUTS using jitter+adapt_diag...
  rval = inputs[0].__getitem__(inputs[1:])
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [cutpoints, beta, sigma]
Sampling 4 chains: 100%|██████████| 40000/40000 [22:00<00:00, 18.34draws/s]
The acceptance probability does not match the target. It is 0.9219092342189799, but should be close to 0.85. Try to increase the number of tuning steps.


### Parameter estimates

In [154]:
pm.summary(trace).round(2)

Unnamed: 0,mean,sd,mc_error,hpd_2.5,hpd_97.5,n_eff,Rhat
beta__0,-0.1,0.33,0.0,-0.77,0.57,6322.92,1.0
beta__1,0.07,0.33,0.0,-0.61,0.74,6308.13,1.0
beta__2,0.13,0.07,0.0,0.01,0.26,18835.0,1.0
beta__3,-0.31,0.07,0.0,-0.45,-0.17,14660.65,1.0
beta__4,-0.58,0.08,0.0,-0.73,-0.42,14406.65,1.0
beta__5,0.61,0.07,0.0,0.48,0.75,14013.94,1.0
beta__6,0.17,0.05,0.0,0.07,0.27,20235.29,1.0
beta__7,0.19,0.03,0.0,0.14,0.24,18473.51,1.0
sigma,0.44,0.15,0.0,0.22,0.74,10921.01,1.0
cutpoints__0,0.13,0.34,0.0,-0.56,0.8,6359.12,1.0


### Predictions

In [155]:
ppc = pm.sample_ppc(trace, samples=5000, model=mod, size=1)

100%|██████████| 5000/5000 [20:30<00:00,  4.18it/s]


In [192]:
y_pred_samps = ppc['y_obs']
y_pred = np.zeros(y_pred_samps.shape[1])

for i in range(0,len(pred)):

    p1 = np.mean(y_pred_samps[:,i] == 0)
    p2 = np.mean(y_pred_samps[:,i] == 1)
    p3 = np.mean(y_pred_samps[:,i] == 2)
    probs = [p1, p2, p3]
    
    y_pred[i] = probs.index(max(probs)) + 1

### Accuracy, confusion matrix

In [193]:
confusion_matrix(y-1, y_pred-1)

array([[2203,  505,    0],
       [1490,  372,    0],
       [ 387,  424,    0]])

In [194]:
round(accuracy_score(y_true=y, y_pred=y_pred),2)

0.48