## Lecture 13: Numpy, random numbers

In [1]:
import numpy as np

In [2]:
# Read in categorized free recall data
f = open('midterm_catfr_data.txt','r')
catfrdata = f.readlines()

categories = []
recalls_catfr = []
subjects_catfr = []
for iline in catfrdata:
    tmp = iline.strip('\n')
    (subj,task,sess,list_num,serialpos,word,cat,recalled) = tmp.split(',')
    categories.append(cat)
    recalls_catfr.append(int(recalled))
    subjects_catfr.append(subj)

# Read in standard free recall data
f = open('fr_data.txt','r')
frdata = f.readlines()

recalls_fr = []
subjects_fr = []
for iline in frdata:
    tmp = iline.strip('\n')
    (subj,task,sess,list_num,serialpos,word,recalled) = tmp.split(',')
    recalls_fr.append(int(recalled))
    subjects_fr.append(subj)

In [3]:
len(subjects_catfr)

72322

In [4]:
%whos

Variable         Type             Data/Info
-------------------------------------------
cat              str              Birds
categories       list             n=72322
catfrdata        list             n=72322
f                TextIOWrapper    <_io.TextIOWrapper name='<...>ode='r' encoding='UTF-8'>
frdata           list             n=125137
iline            str              R1449T,FR1,1,12,12,CROSS,0\n
list_num         str              12
np               module           <module 'numpy' from '/Ap<...>kages/numpy/__init__.py'>
recalled         str              0
recalls_catfr    list             n=72322
recalls_fr       list             n=125137
serialpos        str              12
sess             str              1
subj             str              R1449T
subjects_catfr   list             n=72322
subjects_fr      list             n=125137
task             str              FR1
tmp              str              R1449T,FR1,1,12,12,CROSS,0
word             str              CROSS


In [5]:
recalls_catfr = np.array(recalls_catfr, dtype='int')
subjects_catfr = np.array(subjects_catfr)
categories = np.array(categories)

In [6]:
# for unique subjects
#   for unique categories
#       get the individual trials correpsonding to each subje and cat

subjects_unique = np.unique(subjects_catfr)
categories_unique = np.unique(categories)

In [7]:
subj_mean_recall_by_cat = np.empty((186,26))

In [13]:
for isubj in subjects_unique:
    for icat in categories_unique:
        subj_mean_recall_by_cat[x,y] = np.mean(recalls_catfr[np.logical_and(subjects_catfr==isubj,categories==icat)])
        
        

(26,)

In [14]:
isubj='R1004D'
icat = 'Furniture'

In [39]:
np.mean(recalls_catfr[np.logical_and(subjects_catfr==isubj,categories==icat)])

0.0

In [35]:
len(recalls_catfr)

72322

In [None]:
# Convert our lists to numpy arrays. This will allow us to use numpy functions later on.
recalls_fr = np.array(recalls_fr, dtype='int')
recalls_catfr = np.array(recalls_catfr, dtype='int')

#categories = np.array(categories)
subjects_catfr = np.array(subjects_catfr)
subjects_fr = np.array(subjects_fr)


In [None]:
%whos

In [None]:
# Use the np.intersect1d() function to return the intersection of two numpy arrays
common_subjects = np.intersect1d(subjects_fr,subjects_catfr)

# You can also use the np.unique() function to get the unique elements from a numpy array
#subjlist_fr = np.unique(subjects_fr)
#subjlist_catfr = np.unique(subjects_catfr)


In [None]:
np.shape(common_subjects)
common_subjects[0]

In [None]:
# Now, we can loop over our common_subjects array and use indexing to grab all trials for 
# each subject in turn, taking the average to compute proportion recall for each subject.

# Numpy has a special numeric value 'nan' (not a number). This is useful for initializing 
# numeric arrays without using actual numbers.
common_subjects_prec_fr = np.empty(np.shape(common_subjects))
common_subjects_prec_fr[:] = np.nan

common_subjects_prec_catfr = np.empty(np.shape(common_subjects))
common_subjects_prec_catfr[:] = np.nan

# Do this for the FR and CatFR tasks
counter = 0
for isubj in common_subjects:
    # First, get the indices for the correct rows in the fr data
    rows_to_index = subjects_fr==isubj
    
    # Put the average recall into the across-subject array
    common_subjects_prec_fr[counter] = np.mean(recalls_fr[rows_to_index])
    

    # Then get the indices for the correct rows in the catfr data
    rows_to_index = subjects_catfr==isubj
    
    # Put the average recall into the across-subject array
    common_subjects_prec_catfr[counter] = np.mean(recalls_catfr[rows_to_index])
    
    counter += 1



In [None]:
common_subjects_prec_fr

In [None]:
# Let's introduce a plotting library, matplotlib, so we can visualize our results
import matplotlib.pyplot as plt

In [None]:
plt.scatter(common_subjects_prec_fr,common_subjects_prec_catfr)
plt.xlabel('Free Recall Prec',FontSize=14)
plt.ylabel('Categorized Free Recall Prec',FontSize=14)
plt.show()

In [None]:
plt.scatter(common_subjects_prec_fr,common_subjects_prec_catfr)
plt.xlim((0,1))
plt.ylim((0,1))
plt.xlabel('Free Recall Prec',FontSize=14)
plt.ylabel('Categorized Free Recall Prec',FontSize=14)
plt.show()


In [None]:
plt.scatter(common_subjects_prec_fr,common_subjects_prec_catfr)
plt.xlim((0,1))
plt.ylim((0,1))
plt.plot([0,1],[0,1],'--k')
plt.xlabel('Free Recall Prec',FontSize=14)
plt.ylabel('Categorized Free Recall Prec',FontSize=14)
plt.show()



In [None]:
# How similar is recall performance across subjects for the two tasks?
# Use Pearson correlation to measure similarity
np.corrcoef(common_subjects_prec_fr,common_subjects_prec_catfr)


In [None]:
print(np.random.random())
print(np.random.random())
print(np.random.random())

In [None]:
np.random.seed(1)
print(np.random.random())
print(np.random.random())
print(np.random.random())

In [None]:
np.random.seed(1)
print(np.random.random())
print(np.random.random())
print(np.random.random())

In [None]:
import time
time.time()

In [None]:
np.random.seed(int(time.time()))
np.random.random()