In [5]:
from os import listdir
from os.path import join as pjoin
from os.path import basename, isfile

import numpy as np
import pandas as pd
from scipy.io import loadmat

# Data conversion

Converting .mat files into dataframes and then csv files

In [12]:
def convert_to_csv(file):
    mat = loadmat(file)
    df = pd.DataFrame()
    df['leftP'] = mat['leftP'][0]
    df['rightP'] = mat['rightP'][0]
    df['trial_response_side'] = mat['trialresponseside'][0]
    df['trial_reward'] = mat['trialreward'][0]
    return df

def convert_timing_to_csv(df, file):
    mat = loadmat(file)
    df['cue_time'] = mat['cuetimes'][0]
    # fill this column with nan
    df['response_time'] = np.nan
    # assign rewarded non-nan trials' response time 
    # using the rewardtimes array from the mat file
    df.loc[df['trial_reward'] == 1, 'response_time'] = mat['rewardtimes'][0]
    # similarly for unrewarded trials
    df.loc[df['trial_reward'] == 0, 'response_time'] = mat['wrongdirtimes'][0]
    return df

In [13]:
# load .mat file
for file in listdir(pjoin('behaviour_data', 'mat', 'task_info')):
    path = pjoin('behaviour_data', 'mat', 'task_info', file)
    df = convert_to_csv(path) 
    timing_path = pjoin('behaviour_data', 'mat', 'timing_info', file)
    df = convert_timing_to_csv(df, timing_path)
    df.to_csv(pjoin('behaviour_data', file.split('.')[0] + '.csv'), index=False)

[  25.1209    38.6192   110.4084   169.43915  177.3578   181.00355
  193.8251   212.58885  237.0404   241.11085  247.84515  261.19495
  267.225    332.1205   348.42245  356.40745  365.70735  371.0256
  380.01765  385.89075  399.321    420.7502   434.593    443.02305
  448.4084   470.4502   479.71995  497.06305  527.4273   538.3443
  547.0014   619.12785  622.3267   666.89335  670.22995  702.35045
  713.2576   731.1942   778.69085  794.5826   802.21425  827.06065
  829.8423   844.99305  861.07     868.31735  877.8424   899.87805
  915.3217   921.58365  927.4265   929.89215  944.1785   956.35105
  963.6674   985.8483   990.5774   995.9328  1005.19895 1020.2367
 1023.32035 1032.36095 1162.26025 1225.3413  1258.0846  1301.2648
 1332.25255 1335.0358  1378.6487  1381.6208  1401.8676  1411.1983
 1454.81915 1486.01895 1498.67215 1532.99825 1546.0535  1562.77965
 1576.21715 1585.12155 1626.77115 1630.99395 1647.5696  1723.8553
 1733.2319  1765.7987  1809.65275 1843.4354  1854.55745 1880.0752
 1

KeyError: 'wrongdirtimes'

## convert spike times

In [8]:
for dir in listdir('spike_times'):
    file = pjoin('spike_times', dir, 'spts.mat')
    if not isfile(file):
        print(dir)
        continue
    mat = loadmat(file)
    print(np.array(mat['pfcspts'][0][0]).flatten())
    pfc = [np.array(i).flatten() for i in mat['pfcspts'][0]]
    str = [np.array(i).flatten() for i in mat['strspts'][0]]
    np.save(pjoin('spike_times', dir, 'pfc.npy'), pfc)
    np.save(pjoin('spike_times', dir, 'str.npy'), str)

[7.55000000e-03 4.94000000e-02 1.58700000e-01 ... 1.50738115e+03
 1.50797485e+03 1.50799260e+03]


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

# Convert Wei's summary table



In [None]:
from os.path import join as pjoin, isdir, basename, isfile
from os import listdir, mkdir
import numpy as np
from glob import glob
from tqdm import tqdm 
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

data = pd.read_excel('PPD_RTW_summary_table_Wei_06may2022_V2.xlsx')

# rename the sessions to match the current format
# from index_yy.mm.dd to indexyyyymmdd
data['session'] = data['session'].apply(lambda x: x.replace('.', ''))
data['session'] = data['session'].apply(lambda x: x.replace('_', '20'))

# lower case the cell location + (cell ID -1)
data['cell'] = data['Cell location'].apply(lambda x: x.lower()) + '_' + (data['Cell ID'] - 1).astype(str)
# remove the cell location and cell ID columns
data = data.drop(columns=['Cell location', 'Cell ID'])

# create another csv file storing stimulus, movement and reward correlation information
stimulus_movement_reward = data[['session','Stimulus_related_firing_P_value','Movement_related_firing_P_value','Reward_related_firing_P_value']]
# drop these columns from the original data
data = data.drop(columns=['Stimulus_related_firing_P_value','Movement_related_firing_P_value','Reward_related_firing_P_value'])
# rename the columns
data = data.rename(columns={'Background_firing_correlation_Coefficient_with_PPD': 'background_firing_pearson_r', 'Background_firing_correlation_P_ value_with PPD': 'background_firing_p_values', 'Response_magnitude_correlation_Coefficient_with_PPD': 'response_firing_pearson_r', 'Response_magnitude_correlation_P_ value_with PPD':'response_firing_p_values'})


# save the data in csv format
data.to_csv('delta_P_correlation.csv', index=False)

# save the stimulus, movement and reward correlation information in csv format
stimulus_movement_reward.to_csv('stimulus_movement_reward_correlation.csv', index=False)