In [11]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon May 6 2024

@author: Yaning
Data from Ben
Data file name: intertemporal_choice_dataset_all_trials.csv
Questionnaire from Ben
Questionnaire file name: discountdataset_allsubs_gamblingscores.csv
"""

# get the real percentage of the participants
%run Documents/pyro_models/files_to_run/data_analysis_without_version.ipynb
%run Documents/pyro_models/files_to_run/questionnaire_data_analysis.py

import math
import os
# from turtle import position
import torch
import torch.distributions.constraints as constraints
import pyro
from pyro.optim import Adam
from pyro.infer import SVI, Trace_ELBO, Predictive
import pyro.distributions as dist
from tqdm import tqdm
import matplotlib.pyplot as plt
from statistics import mean 
import pandas as pd
import numpy as np
import scipy.stats as stats 
import pickle
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler

['', 'trial', 'val_basic', 'delay', 'val_prc', 'response', 'RT', 'respSide', 'sideNOW', 'Version', 'loopID', 'Context']
['', 'trial', 'val_basic', 'delay', 'val_prc', 'response', 'RT', 'respSide', 'sideNOW', 'subID', 'Version', 'Trial', 'loopID', 'Context', 'GRCS', 'SEV']
['', 'X', 'subject_id', 'sex', 'cig_day', 'age', 'height', 'weight', 'schoolyears', 'income', 'moneyEGM_spielhallegaststaette', 'moneyEGM_spielcasino', 'AUDIT', 'BDI', 'DSM', 'EHI_score', 'FTND', 'SCL90_soma', 'SCL90_zwang', 'SCL90_unsichersozial', 'SCL90_depress', 'SCL90_aengstlichkeit', 'SCL90_agress', 'SCL90_phob', 'SCL90_para', 'SCL90_psychotizismus', 'SCL90_zusatz', 'SCL90_GSI', 'STAIT', 'BIS_motor', 'BIS_nonplanning', 'BIS_attention', 'BIS_Gesamtwert', 'BB_BIS_GS', 'BB_BAS_GS', 'BB_BAS_D', 'BB_BAS_FS', 'BB_BAS_RP', 'TIPI_ex', 'TIPI_ag', 'TIPI_con', 'TIPI_es', 'TIPI_oe', 'TCI_NS1', 'TCI_NS2', 'TCI_NS3', 'TCI_NS4', 'TCI_total', 'PTTS', 'NPRS_pos', 'NPRS_neg', 'ZTPI_pastNegative', 'ZTPI_presentHedonistic', 'ZTPI_fu

In [3]:
# GRCS (gambling-related cognitive score)
# SEV (gambling severity)
gamble_cognitive = quest_data[:,1].astype(float)
gamble_sev = quest_data[:,2].astype(float)

In [4]:
gamble_cognitive

array([21.7     , 12.483333, 13.183333, 15.883333, 19.633333, 30.4     ,
       15.466667, 19.5     , 21.95    , 11.2     , 17.      , 18.083333,
       19.033333, 20.9     , 12.716667, 23.5     , 17.066667, 18.533333,
       17.716667, 11.883333, 14.      , 18.      , 17.7     , 13.716667,
       23.316667, 11.7     , 20.5     , 20.166667, 21.366667,  9.566667])

In [34]:
np.corrcoef(gamble_cognitive, gamble_sev)[0,1]

0.3034422851888282

In [2]:
# load inferred parameters
with open('Documents/pyro_models/files_to_run/results/logistic_ohne_sigma_es_and_c.pkl', 'rb') as f:
    pos_dict = pickle.load(f)

In [3]:
pos_params = pos_dict['m_locs']

In [4]:
pos_mean_u = pos_params[:,0]
pos_sigma_u = np.exp(pos_params[:,1])
# pos_sigma_es = np.exp(pos_params[:,2])
pos_a = np.exp(pos_params[:,2])
pos_b =np.exp(pos_params[:,3])
# pos_c =np.exp(pos_params[:,5])
pos_beta = np.exp(pos_params[:,4])

In [5]:
pos_mean_u_cafe = pos_mean_u[:30]
pos_mean_u_gamble = pos_mean_u[30:]
pos_sigma_u_cafe = pos_sigma_u[:30]
pos_sigma_u_gamble = pos_sigma_u[30:]
# pos_sigma_es_cafe = pos_sigma_es[:30]
# pos_sigma_es_gamble = pos_sigma_es[30:]
pos_a_cafe = pos_a[:30]
pos_a_gamble = pos_a[30:]
pos_b_cafe = pos_b[:30]
pos_b_gamble = pos_b[30:]
pos_beta_cafe = pos_beta[:30]
pos_beta_gamble = pos_beta[30:]

In [44]:
inferred_list = [pos_mean_u_cafe,
                 pos_mean_u_gamble,
                 pos_sigma_u_cafe,
                 pos_sigma_u_gamble,
                #  pos_sigma_es_cafe,
                #  pos_sigma_es_gamble,
                 pos_a_cafe,
                 pos_a_gamble,
                 pos_b_cafe,
                 pos_b_gamble,
                 pos_beta_cafe,
                 pos_beta_gamble]

inferred_list = np.array(inferred_list)

In [7]:
inferred_list_name = ['pos_mean_u_cafe',
                      'pos_mean_u_gamble',
                      'pos_sigma_u_cafe',
                      'pos_sigma_u_gamble',
                      'pos_a_cafe',
                      'pos_a_gamble',
                      'pos_b_cafe',
                      'pos_b_gamble',
                      'pos_beta_cafe',
                      'pos_beta_gamble']

inferred_list_name = np.array(inferred_list_name)

In [12]:
questionnaire_list = [gamble_cognitive, gamble_sev]

In [40]:
quest_data_ohne_id = quest_data[:, 1:]

quest_data_header_ohne_id = quest_data_header[1:]

In [37]:
correlate_list = []

for i in range(quest_data_ohne_id.shape[1]):
    for j in range(len(inferred_list)):
        correlation_score = np.corrcoef(quest_data_ohne_id[:,i].astype('float'), inferred_list[j])[0,1]
        if abs(correlation_score) > 0.5:
            correlate_list.append([i,j,correlation_score])

correlate_list = np.array(correlate_list)


In [39]:
correlate_list

array([[ 0.        ,  2.        ,  0.54460082],
       [14.        ,  0.        , -0.53599517],
       [14.        ,  2.        , -0.59787693],
       [29.        ,  3.        , -0.55758363],
       [33.        ,  0.        ,  0.53646566],
       [33.        ,  9.        , -0.5459556 ],
       [34.        ,  9.        , -0.50421799],
       [36.        ,  0.        ,  0.50559526],
       [57.        ,  2.        ,  0.5264918 ],
       [61.        ,  2.        ,  0.54460082],
       [66.        ,  4.        ,  0.61915751]])

In [45]:
corr_quest_header = quest_data_header_ohne_id[correlate_list[:,0].astype(int)]
corr_infer_header = inferred_list_name[correlate_list[:,1].astype(int)]
corr_quest_data = quest_data_ohne_id[:,correlate_list[:,0].astype(int)]
corr_infer_data = inferred_list[correlate_list[:,1].astype(int)]

In [56]:
quest_data_ohne_id.shape

(30, 67)

In [58]:
corr_infer_data.shape

(11, 30)

In [50]:
for i in correlate_list[:,2]:
    print('\t')
    print(i)

	
0.5446008177275701
	
-0.5359951660646018
	
-0.5978769274098762
	
-0.5575836349330996
	
0.5364656591949
	
-0.5459556043293237
	
-0.5042179929186872
	
0.5055952601431339
	
0.5264917957435136
	
0.5446008204724595
	
0.6191575122416136


Principle Component Analysis

In [79]:
def pca(header, data):

    dict_data = {}
    for i in range(header.shape[0]):
        dict_data[header[i]] = data[i]

    data = pd.DataFrame(dict_data)
    # print(data)
    # Standardize the data
    scaler = StandardScaler()
    scaled_data = scaler.fit_transform(data)

    # Perform PCA
    pca = PCA(n_components=2)  # Choose the number of components
    pca.fit(scaled_data)

    # Get the principal components
    principal_components = pca.transform(scaled_data)

    # Explained variance
    explained_variance = pca.explained_variance_ratio_
    print('Explained variance by each component:', explained_variance)

    # Create a DataFrame with principal components
    principal_df = pd.DataFrame(data=principal_components, columns=['PC1', 'PC2'])
    print(principal_df)

    # Get the loadings (eigenvectors)
    loadings = pca.components_

    # Create a DataFrame with the loadings
    loadings_df = pd.DataFrame(loadings.T, columns=['PC1', 'PC2'], index=data.columns)

    print("Loadings of original variables on each principal component:")
    print(loadings_df.to_string())

    # return principal_df, loadings_df

In [51]:
test_header = [corr_quest_header[1], corr_infer_header[1]]

In [63]:
test_header = np.array(test_header)

In [59]:
test_data = [corr_quest_data[:,1], corr_infer_data[1]]

In [64]:
test_data = np.array(test_data)

In [68]:
test_header.shape

(2,)

In [81]:
test_header

array(['EHI_score', 'pos_mean_u_cafe'], dtype='<U15')

In [80]:
pca(test_header, test_data)

Explained variance by each component: [0.76799758 0.23200242]
         PC1       PC2
0  -0.171607  0.736260
1  -0.738320  0.169547
2   0.324947  0.306473
3   0.476544  1.384411
4  -0.167586  0.395173
5   4.048107  0.262513
6  -0.188680 -0.160837
7  -0.691863 -0.761801
8   0.749388  1.657255
9  -0.480690 -0.159506
10 -0.387958 -0.360114
11 -0.797458  0.110409
12 -1.279014 -0.371147
13  0.393351 -0.002521
14 -1.254014 -0.346147
15  0.671154  1.233914
16  3.873658 -0.740194
17 -0.320829  0.587038
18  0.122650 -0.513385
19 -0.508262  0.106264
20 -0.833304  0.074563
21 -1.006543 -0.098676
22 -0.542304  0.365563
23 -0.657773 -0.401775
24 -0.124240 -0.976421
25 -0.193783  0.127402
26 -1.105952 -0.198085
27 -0.780273  0.127594
28 -0.193483 -1.118999
29  1.764138 -1.434773
Loadings of original variables on each principal component:
                      PC1       PC2
EHI_score       -0.707107  0.707107
pos_mean_u_cafe  0.707107  0.707107
