Import packages.

In [1]:
from extracting import load_abr, load_dp
import os

Define paths.

In [2]:
MASTER_LIST = "/Users/corstiaen/OneDrive - Karolinska Institutet/CVanalysis/MasterExperimentList.csv"
DATA_DIR_CAJ = '/Users/corstiaen/OneDrive - Karolinska Institutet/CVanalysis/Jackson'
DATA_DIR_JRJ = '/Users/corstiaen/OneDrive - Karolinska Institutet/CVanalysis/Janvier'
DATA_DIR_SCA = '/Users/corstiaen/OneDrive - Karolinska Institutet/CVanalysis/Scanbur'

# ABR

## Load
Load data from .csv files that were created by exporting "cursors" from Tucker-Davis Technologies .awf files.

In [3]:
df_caj_abr = load_abr(DATA_DIR_CAJ, MASTER_LIST)
df_jrj_abr = load_abr(DATA_DIR_JRJ, MASTER_LIST)
df_sca_abr = load_abr(DATA_DIR_SCA, MASTER_LIST)

ABRthresholds.csv has no readable noise_type; skipping file.
ABRthresholds.csv has no readable noise_type; skipping file.
ABRthresholds.csv has no readable noise_type; skipping file.


## Clean
Remove data from deviating experimental condition.

In [4]:
df_sca_abr = df_sca_abr.query("special == 'none'")

In [5]:
df_caj_abr = df_caj_abr.drop("special", axis=1)
df_jrj_abr = df_jrj_abr.drop("special", axis=1)
df_sca_abr = df_sca_abr.drop("special", axis=1)

## Look at raw data
Number of ABR data points per substrain, before preprocessing.

In [6]:
print(f"There are {len(df_caj_abr)} values for CaJ.")
print(f"There are {len(df_jrj_abr)} values for JRj.")
print(f"There are {len(df_sca_abr)} values for Sca.")

There are 11805 values for CaJ.
There are 8489 values for JRj.
There are 7985 values for Sca.


Example of how our data looks.

In [7]:
df_caj_abr.sample(4)

Unnamed: 0,file_number,substrain,id,noise_spl,analyzer_id,experimenter_id,noise_type,abr_time,level_db,freq_hz,wave1_amp,threshold
1590,110,CaJ,C202_4,100,JF,JF,ZT3,2w,65.0,16000,520.503,50.0
7064,578,CaJ,C52_4,105,CV,RP,baseline,baseline,55.0,16000,704.339,5.0
5012,382,CaJ,C286_2,105,JF,JF,baseline,baseline,25.0,16000,283.757,10.0
10668,905,CaJ,C100_2,103,JF,JF,ZT15,2w,80.0,12000,971.107,50.0


## Save
Write data to csv.

In [8]:
parent_dir = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
raw_data_dir = os.path.join(parent_dir, "data", "raw")
if not os.path.exists(raw_data_dir):
    os.mkdir(raw_data_dir)

In [9]:
df_caj_abr.to_csv(os.path.join(raw_data_dir, "ABR_CaJ_raw.csv"), index=False)
df_jrj_abr.to_csv(os.path.join(raw_data_dir, "ABR_JRj_raw.csv"), index=False)
df_sca_abr.to_csv(os.path.join(raw_data_dir, "ABR_Sca_raw.csv"), index=False)

# DP

## Load
Load data from Tucker-Davis Technologies .awf files directly.

In [10]:
df_caj_dp = load_dp(DATA_DIR_CAJ, MASTER_LIST)
df_jrj_dp = load_dp(DATA_DIR_JRJ, MASTER_LIST)
df_sca_dp = load_dp(DATA_DIR_SCA, MASTER_LIST)

## Clean
Remove data from deviating experimental condition.

In [11]:
df_sca_dp = df_sca_dp.query("special == 'none'")

In [12]:
df_caj_dp = df_caj_dp.drop("special", axis=1)
df_jrj_dp = df_jrj_dp.drop("special", axis=1)
df_sca_dp = df_sca_dp.drop("special", axis=1)

## Look at raw data
Number of DP data points per substrain, before preprocessing.

In [13]:
print(f"There are {len(df_caj_dp)} values for CaJ.")
print(f"There are {len(df_jrj_dp)} values for JRj.")
print(f"There are {len(df_sca_dp)} values for Sca.")

There are 12338 values for CaJ.
There are 11305 values for JRj.
There are 10257 values for Sca.


Example of how our data looks.

In [14]:
df_caj_dp.sample(4)

Unnamed: 0,file_number,substrain,noise_spl,noise_type,abr_time,id,experimenter_id,freq_hz,level_db,f1,f2,level_f1,level_f2,level_distprod
1711,28,CaJ,100,baseline,baseline,C36_3,RP,32000.0,55.0,26656.0,32000.0,35.408375,23.564014,5.752388
10591,487,CaJ,103,ZT3,24h,C112_0,JF,24000.0,35.0,19992.0,24000.0,33.169533,19.119251,-21.14621
7653,305,CaJ,105,ZT15,2w,C60_3,RP,12000.0,55.0,9996.0,12000.0,60.383541,48.687029,-16.695984
5673,784,CaJ,105,ZT15,2w,C287_3,JF,12000.0,75.0,9996.0,12000.0,76.052581,67.246708,-13.301506


## Save
Write data to csv. (The raw data directory has already been created.)

In [15]:
df_caj_dp.to_csv(os.path.join(raw_data_dir, "DP_CaJ_raw.csv"), index=False)
df_jrj_dp.to_csv(os.path.join(raw_data_dir, "DP_JRj_raw.csv"), index=False)
df_sca_dp.to_csv(os.path.join(raw_data_dir, "DP_Sca_raw.csv"), index=False)