# Juror fMRI Parameter Estimate Organization

## import packages

In [1]:
import os
import shutil
import glob
import pandas as pd
import numpy as np
import json

## get data

In [2]:
#paths
files = glob.glob('data/*/*/*thresh*.txt')

In [3]:
#get subject ID - will use to match files
subject_list = []
event_list = []
mask_list = []
for filename in files:
    splitdir = filename.split('/')
    subjID = splitdir[1]
    subject_list.append(subjID)
subject_list = list(set(subject_list))
subject_list.sort()
print(subject_list)

['19933', '19934', '19947', '19953', '19963', '19968', '19974', '19977', '19980', '19983', '19993', '20000', '20007', '20009', '20010', '20022', '20026', '20031', '20032', '20033', '20036', '20059', '20070', '20073', '20074', '20075', '20080', '20101', '20109']


# Scenario Prep

## grab mean parameter estimates

In [12]:
#event & mask list
event_list = ['scenario']
mask_list = ['juror_scenario_thresh_mask']

#read each dataframe
df_mask = []
for m in mask_list:
    df_evs = []
    for ev in event_list:
        df_sub = []
        avg = pd.DataFrame()
        for sub in subject_list:
            df = pd.read_csv(os.path.join('data/%s/%s/merged_%s_%s_%s.txt'%(sub,ev,sub,ev,m)),header=None,sep='\t',names=['zstat'])
            df['subjectID'] = sub
            df['m'] = m
            df['event'] = ev
            df_sub.append(df)
df2 = pd.concat(df_sub, ignore_index=True, axis=0)

#add repeating rows for the case number
from itertools import cycle
seq = cycle(range(1,34))
df2['scenario'] = [next(seq) for count in range(df2.shape[0])]

#rearrange column order
df2 = df2[['subjectID', 'm', 'event', 'scenario', 'zstat']]

## add extra scenario variables

In [30]:
#scenario classification info
scenario_class = pd.read_csv('../../behavior/data/scenario_classification.csv')
#subset data
scenario_class = scenario_class[['scenario', 'category', 'JRL']]
scenario_class = scenario_class.rename(columns={'Category': 'crime_severity', "JRL": 'victim_type'})

#pca results
scenario_pca_results = pd.read_csv('../../behavior/pca_loadings-fmri.csv')
scenario_pca_results = scenario_pca_results.rename(columns={'Unnamed: 0': 'scenario'})

merged_scenario_info = pd.merge_ordered(scenario_class, scenario_pca_results, on="scenario")

all_data = pd.merge_ordered(df2, merged_scenario_info, on="scenario")

## save data

In [31]:
all_data.to_csv('subject_scenario_mean_zstat.csv', index=False) 

# Evidence Prep

## grab mean parameter estimates

In [4]:
#event & mask list
event_list = ['evidence']
mask_list = ['juror_evidence_thresh_mask']
#mask_list = ['avg_reading_evidence_mask']

#read each dataframe
df_mask = []
for m in mask_list:
    df_evs = []
    for ev in event_list:
        df_sub = []
        avg = pd.DataFrame()
        for sub in subject_list:
            df = pd.read_csv(os.path.join('data/%s/%s/merged_%s_%s_%s.txt'%(sub,ev,sub,ev,m)),header=None,sep='\t',names=['zstat'])
            df['subjectID'] = sub
            df['m'] = m
            df['event'] = ev
            df_sub.append(df)
df2 = pd.concat(df_sub, ignore_index=True, axis=0)

#add repeating rows for the case number
from itertools import cycle
seq = cycle(range(1,34))
df2['scenario'] = [next(seq) for count in range(df2.shape[0])]

#rearrange column order
df2 = df2[['subjectID', 'm', 'event', 'scenario', 'zstat']]

df2['subjectID'] = df2['subjectID'].astype(str).astype(int)

## get the sum of the case strength

In [5]:
#subject-level fmri timing info 
subj_data = pd.read_csv('../../behavior/data/all_juror_fmri_behavior_long.csv', encoding = 'latin-1')
subj_data = subj_data.sort_values(by=['uid','scenario'])
#rename subject variable
subj_data = subj_data.rename(columns={'uid': 'subjectID'})

#drop repeated rows (each subject has a row for case strength & punishment ratings)
subj_data = subj_data[subj_data.rating_type != 'rate_punishment']

#only need a couple columns
subj_data = subj_data[['subjectID','scenario','history','witness','physical']]

#######################
#case strength weights
evidence_cs = pd.read_csv('../../behavior/data/evidence_effects_fmri_sample.csv')
#drop punishment values (keep case strength)
evidence_cs = evidence_cs[evidence_cs.outcome == 'rating']
#create a list of our conditions
conditions = [
    (evidence_cs['evidence'] == 'physicalNon-DNA'),
    (evidence_cs['evidence'] == 'physicalDNA'),
    (evidence_cs['evidence'] == 'historyUnrelated'),
    (evidence_cs['evidence'] == 'historyRelated'),
    (evidence_cs['evidence'] == 'witnessYes Witness')
    ]
#create a list of the values we want to assign for each condition
ev_type = ['physical', 'physical',
          'history', 'history',
          'witness']

ev_level = ['nonDNA', 'DNA',
          'unrelatedPrior', 'relatedPrior',
          'isWitness']
#create a new column and use np.select to assign values to it using our lists as arguments
evidence_cs['type'] = np.select(conditions, ev_type)
evidence_cs['level'] = np.select(conditions, ev_level)
#keep only a couple columns
evidence_cs = evidence_cs[['mean','type','level']]
#add data for no evidence
listOfSeries = [pd.Series([0, 'physical', 'noPhys'], index=evidence_cs.columns ) ,
                pd.Series([0, 'history', 'noPrior'], index=evidence_cs.columns ) ,
                pd.Series([0, 'witness', 'noWitness'], index=evidence_cs.columns ) ]
evidence_cs = evidence_cs.append(listOfSeries , ignore_index=True)


#split the dataframe
evidence_physical =  evidence_cs[evidence_cs.type == 'physical']
evidence_physical = evidence_physical[['mean','level']]
evidence_history = evidence_cs[evidence_cs.type == 'history']
evidence_history = evidence_history[['mean','level']]
evidence_witness = evidence_cs[evidence_cs.type == 'witness']
evidence_witness = evidence_witness[['mean','level']]

#replace the condition with the mean case strength weight

#witness
w = evidence_witness.set_index('level')['mean']
subj_data['witness_weight'] = subj_data['witness'].replace(w)
#history
h = evidence_history.set_index('level')['mean']
subj_data['history_weight'] = subj_data['history'].replace(h)
#physical
p = evidence_physical.set_index('level')['mean']
subj_data['physical_weight'] = subj_data['physical'].replace(p)
#sum the weights
subj_data['weight_sum'] = subj_data['witness_weight']+subj_data['history_weight']+subj_data['physical_weight']


## get the evidence text

In [56]:
stim = '../../behavior/data/scenarios.json'
stim_json = open(stim)
stim_json = json.load(stim_json)
stim_data = pd.json_normalize(stim_json)

#rename default columns/var names
stim_data = stim_data.rename(columns={'abbr': 'scenario',
                                      'vars.base.Base':'base',
                                      'vars.Criminal History.relatedPrior': 'history_relatedPrior',
                                      'vars.Criminal History.unrelatedPrior':'history_unrelatedPrior',
                                      'vars.Criminal History.noPrior':'history_noPrior',
                                      'vars.Witness.isWitness':'witness_isWitness',
                                      'vars.Witness.noWitness':'witness_noWitness',
                                      'vars.Physical Evidence.DNA':'physical_DNA',
                                      'vars.Physical Evidence.nonDNA':'physical_nonDNA',
                                      'vars.Physical Evidence.noPhys':'physical_noPhys'})
#format scenario number
stim_data['scenario'] = stim_data['scenario'].astype(int)

#melt dataframe
stim_data = pd.melt(stim_data, id_vars=['scenario'],
                    value_vars=['base', 'history_relatedPrior','history_unrelatedPrior','history_noPrior',
                                'witness_isWitness','witness_noWitness','physical_DNA',
                                'physical_nonDNA','physical_noPhys'],value_name='text',var_name='evidence')
stim_data = stim_data.sort_values(by=['scenario','evidence'])

#we're not really interested in the base text for now so let's get rid of it
stim_data = stim_data[stim_data.evidence != 'base']

#make an extra column to refine evidence types
stim_data['level']=stim_data.evidence.str.split("_").str[1]
stim_data['type']=stim_data.evidence.str.split("_").str[0]


#count number of words (very simple)
stim_data['word_count'] = stim_data.text.apply(lambda x: len(str(x).split(' ')))

#merged_stim_info = pd.merge_ordered(stim_data, evidence_cs, on=['type','level'])

# Reading Grade Level

## Run in R

In [57]:
# enables the %%R magic, not necessary if you've already done this
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [58]:
%%R
library(dplyr)
library(tidytext)
library(quanteda)

In [59]:
%%R -i stim_data -o read_lvl
# word, sentence, and syllable counts, plus reading scores
read_lvl <- stim_data %>%
  mutate(syllables = nsyllable(text),
         sentences = nsentence(text),
         words = ntoken(text, remove_punct = TRUE),
         fk_grade = 0.39*(words/sentences) + 11.8*(syllables/words) - 15.59) %>%
  arrange(scenario)

#read_lvl <- dplyr::select(read_lvl,-c(2,3,4,5,6,7,8)) #3=text,#4=level,#5=type,#6=wc,#7=syllab,#8=sentences,#9=words,#10=fk_grade
read_lvl <- dplyr::select(read_lvl,-c(3,6,7,8,9))


## Back to Python

In [62]:
#merge the reading level with other stim info
merged_stim_info_tmp = pd.merge_ordered(stim_data, read_lvl, on=['scenario','evidence','type','level'])

merged_stim_info = pd.merge_ordered(merged_stim_info_tmp, evidence_cs, on=['type','level'])
merged_stim_info

Unnamed: 0,scenario,evidence,text,level,type,word_count,fk_grade,mean
0,1,history_noPrior,McNeil has no criminal record.,noPrior,history,5,5.240000,0.0
1,2,history_noPrior,Archer has no criminal record.,noPrior,history,5,7.600000,0.0
2,3,history_noPrior,Montes has no criminal record.,noPrior,history,5,7.600000,0.0
3,4,history_noPrior,Galloway has no criminal record.,noPrior,history,5,9.960000,0.0
4,5,history_noPrior,Fray has no criminal record.,noPrior,history,5,5.240000,0.0
...,...,...,...,...,...,...,...,...
259,29,witness_noWitness,"The driver of the van, dazed by the impact, wa...",noWitness,witness,16,9.825000,0.0
260,30,witness_noWitness,Investigators were unable to find anyone who c...,noWitness,witness,13,12.172308,0.0
261,31,witness_noWitness,The boy is too traumatized to testify and ther...,noWitness,witness,12,9.740000,0.0
262,32,witness_noWitness,No witnesses were present in the courthouse al...,noWitness,witness,12,13.673333,0.0


In [65]:
#split the dataframe
stim_physical = merged_stim_info[merged_stim_info.type == 'physical']
stim_physical = stim_physical[['scenario','fk_grade','word_count','level']]
stim_physical = stim_physical.rename(columns={'level': 'physical'})

stim_history = merged_stim_info[merged_stim_info.type == 'history']
stim_history = stim_history[['scenario','fk_grade','word_count','level']]
stim_history = stim_history.rename(columns={'level': 'history'})

stim_witness = merged_stim_info[merged_stim_info.type == 'witness']
stim_witness = stim_witness[['scenario','fk_grade','word_count','level']]
stim_witness = stim_witness.rename(columns={'level': 'witness'})


merged_stim_info2 = subj_data.merge(stim_witness, how = 'inner', on = ['scenario', 'witness'])
merged_stim_info2 = merged_stim_info2.rename(columns={'word_count': 'witness_wc','fk_grade': 'witness_fk_grade'})

merged_stim_info2 = merged_stim_info2.merge(stim_history, how = 'inner', on = ['scenario', 'history'])
merged_stim_info2 = merged_stim_info2.rename(columns={'word_count': 'history_wc','fk_grade': 'history_fk_grade'})

merged_stim_info2 = merged_stim_info2.merge(stim_physical, how = 'inner', on = ['scenario', 'physical'])
merged_stim_info2 = merged_stim_info2.rename(columns={'word_count': 'physical_wc','fk_grade': 'physical_fk_grade'})
merged_stim_info2['wc_sum'] = merged_stim_info2['witness_wc']+merged_stim_info2['history_wc']+merged_stim_info2['physical_wc']
merged_stim_info2['fk_grade_sum'] = merged_stim_info2['witness_fk_grade']+merged_stim_info2['history_fk_grade']+merged_stim_info2['physical_fk_grade']
merged_stim_info2['fk_grade_avg'] = merged_stim_info2[['witness_fk_grade', 'history_fk_grade', 'physical_fk_grade']].mean(axis=1)


all_data = pd.merge_ordered(df2, merged_stim_info2, on=['subjectID','scenario'])
all_data.to_csv('subject_evidence_mean_zstat.csv', index=False)