### This code imports the data from the Steinmetz et al. (2019) dataset. First, we extract the data from it's stored form in .tar archives to python dictionaries.

In [None]:
import tarfile
import glob
import numpy as np
import os

def mylistdir(directory):
    """A specialized version of os.listdir() that ignores files that
    start with a leading period."""
    filelist = os.listdir(directory)
    return [x for x in filelist
            if not (x.startswith('.'))]

# filepath here should be wherever you have allData (8GB folder of downloaded data) saved
folder = 'your/filepath/here'
n_sessions = len(os.listdir(folder))

for f in mylistdir(folder):
    # creating a new folder to save this session's extracted folder
    new_folder = os.path.join('your/filepath/here', f'{f}_extracted')
    new_path = f'your/filepath/here/{f}'
    if new_path.endswith('.tar'):
        if os.path.isfile(new_path)==True:
            # print('good 2 go')
            tar = tarfile.open(new_path, "r:")
            ## put your filepath that you want to extract to here!! 
            tar.extractall(f'{new_folder}')
            tar.close()          
    else:
        # print(f'{f} is a directory, can not unzip')
        continue  
        
print('done!')

### Next, we take the extracted data and load it from .npy files into python dictionaries.

In [None]:
## thank you to https://github.com/Debu922/NMA_Mapping_Brain_Networks_2020/blob/master/notebooks/loadData.ipynb 
## (/debhh/ on neurostars) who shared a similar version of this code - I adapted it for the data & format 
## we are using for our project

import glob
import numpy as np
import os
import csv

session_paths = glob.glob('your/filepath/here/*')
n_sessions=40

## The properties of a given object share the same number of n data elements 
## (specifically, the same number of rows). So spikes.times and spikes.clusters 
## give two properties of the spikes object, and each have one entry for each spike

## object one : neural data

# n = spikes
spikes_clusters = {} # which cluster # does the spike belong to
spikes_times = {}

# n = channels
channel_brainLocations = {x:[] for x in range(n_sessions)} 

# n = clusters
clusters_phy_annotation = {} # 0 noise, 1 signal coming from multiple neurons, 3 unsorted (only use 2's)
clusters_peakChannel = {} # the channel number of the location of the peak of the cluster's waveform

## object two : visual discrimnation task
# n = trials
trial_intervals = {}
goCue_times = {}
feedback_types = {} # CORRECT // INCORRECT
feedback_times = {}
response_choices = {} # DIRECTION
response_times = {}
stimContrast_left = {}
stimContrast_right = {}
stim_times = {}

## object three : behavioral data
## n = trials
eye_area = {}
eye_times = {}
face_energy = {}
face_times = {}

idx = 1

for session in session_paths:
    
    # spikes
    spikes_clusters[idx] = np.load(session + '/spikes.clusters.npy', allow_pickle = True)
    spikes_times[idx] = np.load(session + '/spikes.times.npy', allow_pickle = True)
    
    # clusters
    clusters_phy_annotation[idx] = np.load(session + '/clusters._phy_annotation.npy', allow_pickle = True)
    clusters_peakChannel[idx] = np.load(session + '/clusters.peakChannel.npy', allow_pickle = True)
    
    # channels
    with open(session + '/channels.brainLocation.tsv') as tsvfile:
        reader = csv.DictReader(tsvfile, dialect='excel-tab')
        for row in reader:
            channel_brainLocations[idx].append(row['allen_ontology'])
            
    # visual discrimination task
    goCue_times[idx] = np.load(session + '/trials.goCue_times.npy', allow_pickle = True)
    feedback_types[idx] = np.load(session + '/trials.feedbackType.npy', allow_pickle = True)
    feedback_times[idx] = np.load(session + '/trials.feedback_times.npy', allow_pickle = True)
    response_choices[idx] = np.load(session + '/trials.response_choice.npy', allow_pickle = True)
    response_times[idx] = np.load(session + '/trials.response_times.npy', allow_pickle = True)
    stimContrast_left[idx] = np.load(session + '/trials.visualStim_contrastLeft.npy', allow_pickle = True)
    stimContrast_right[idx] = np.load(session + '/trials.visualStim_contrastRight.npy', allow_pickle = True)
    stim_times[idx] = np.load(session + '/trials.visualStim_times.npy', allow_pickle = True)
    trial_intervals = np.load(session + '/trials.intervals.npy', allow_pickle = True)
    
    # behavioral data
    eye_area = np.load(session + '/eye.area.npy', allow_pickle = True)
    eye_times = np.load(session + '/eye.timestamps.npy', allow_pickle = True)
    face_energy = np.load(session + '/face.motionEnergy.npy', allow_pickle = True)
    face_times = np.load(session + '/face.timestamps.npy', allow_pickle = True)
    
    # increment session counter index
    idx = idx+1
    
    
print('all data is in dictionaries!')

### Lastly, we can save the dictionaries that we created for variables of interest to their own .npy files, for easy access later.

In [None]:
## saving dictionaries to pickle files

# wherever you want to save your pickles to
os.chdir('your/filepath/here')
import pickle

dicts_dict = {'spikes_clusters':spikes_clusters, 'spikes_times': spikes_times,'channel_brainLocations':channel_brainLocations,
          'clusters_phy_annotation': clusters_phy_annotation, 'clusters_peakChannel': clusters_peakChannel,
          'trial_intervals': trial_intervals, 'goCue_times':goCue_times, 'feedback_types': feedback_types,
          'feedback_times': feedback_times, 'response_choices': response_choices, 'response_times': response_times,
          'stimContrast_left': stimContrast_left, 'stimContrast_right': stimContrast_right, 'stim_times': stim_times,
          'eye_area': eye_area, 'eye_times': eye_times, 'face_energy': face_energy, 'face_times': face_times}

for name,dict_ in dicts_dict.items():

    with open(f'{name}.npy', 'wb') as myFile:
    pickle.dump(dict_, myFile)
    
print('all files exported!')