In [1]:
import pandas as pd
import re
from old_dataset_handler import get_date, get_exp, get_time

In [2]:
ANNOT_PATH = r'../../clock_neurons_annotation.csv'
annot_df = pd.read_csv(ANNOT_PATH, index_col=0)
annot_df

Unnamed: 0,experiment,time,Repeats,Idents
20181215_CLK856_LD_zt14_AR07_ACAGGA,CLK856_LD,zt14,LD_2,29:LPN
20181215_CLK856_LD_zt14_AR07_CTTCTG,CLK856_LD,zt14,LD_2,29:LPN
20181231_CLK856_LD_zt14_AR02_GTACCA,CLK856_LD,zt14,LD_1,29:LPN
20181231_CLK856_LD_zt14_AR02_TCCTTC,CLK856_LD,zt14,LD_1,29:LPN
20181231_CLK856_LD_zt14_AR08_AGACAG,CLK856_LD,zt14,LD_1,29:LPN
...,...,...,...,...
20190710_CLK856_DD_zt06_AR04_GTCTTC,CLK856_DD,zt06,DD_1,1:DN1p_CNMa
20190710_CLK856_DD_zt06_AR04_TCCTTC,CLK856_DD,zt06,DD_1,1:DN1p_CNMa
20190719_CLK856_DD_zt06_AR18_GTCTTC,CLK856_DD,zt06,DD_2,1:DN1p_CNMa
20190814_CLK856_DD_zt06_AR19_TCTGCA,CLK856_DD,zt06,DD_2,1:DN1p_CNMa


In [3]:
def get_exp(neuron: str) -> str:
    """
    Get experiment (DD || LD) applied to the neuron
    :param neuron:
    :return: string 'DD' or 'LD'
    """
    exp_re = re.compile(r"(_)(DD|LD)(_)")
    match = exp_re.search(neuron)
    if not match:
        raise ValueError(f'Experiment not found in {neuron}')
    return match.group(2)


def change_idx(neuron: str) -> str:
    """
    Updates index column to match dataset notation
    :param neuron:
    :return: modified string
    """
    time_pattern = r'(zt)(\d\d)'
    match = re.search(time_pattern, neuron)
    if not match:
        raise ValueError(f"Time slot not found in {neuron}")
    exp = get_exp(neuron)
    new_time_str = 'CT' if exp == 'DD' else 'ZT'
    new_idx = re.sub(time_pattern, rf"{new_time_str}{match.group(2)}", neuron)
    return new_idx

test_zt = "20181231_CLK856_LD_zt14_AR08_AGACAG"
test_ct = "20190001_CLK856_DD_zt14_AR01_CTTCTG"
print(f"{test_zt} ==> {change_idx(test_zt)}")
print(f"{test_ct} ==> {change_idx(test_ct)}")

20181231_CLK856_LD_zt14_AR08_AGACAG ==> 20181231_CLK856_LD_ZT14_AR08_AGACAG
20190001_CLK856_DD_zt14_AR01_CTTCTG ==> 20190001_CLK856_DD_CT14_AR01_CTTCTG


In [4]:
new_annot_df = annot_df.rename(mapper=change_idx, axis='index')
new_annot_df

Unnamed: 0,experiment,time,Repeats,Idents
20181215_CLK856_LD_ZT14_AR07_ACAGGA,CLK856_LD,zt14,LD_2,29:LPN
20181215_CLK856_LD_ZT14_AR07_CTTCTG,CLK856_LD,zt14,LD_2,29:LPN
20181231_CLK856_LD_ZT14_AR02_GTACCA,CLK856_LD,zt14,LD_1,29:LPN
20181231_CLK856_LD_ZT14_AR02_TCCTTC,CLK856_LD,zt14,LD_1,29:LPN
20181231_CLK856_LD_ZT14_AR08_AGACAG,CLK856_LD,zt14,LD_1,29:LPN
...,...,...,...,...
20190710_CLK856_DD_CT06_AR04_GTCTTC,CLK856_DD,zt06,DD_1,1:DN1p_CNMa
20190710_CLK856_DD_CT06_AR04_TCCTTC,CLK856_DD,zt06,DD_1,1:DN1p_CNMa
20190719_CLK856_DD_CT06_AR18_GTCTTC,CLK856_DD,zt06,DD_2,1:DN1p_CNMa
20190814_CLK856_DD_CT06_AR19_TCTGCA,CLK856_DD,zt06,DD_2,1:DN1p_CNMa


Next step: update time notation

In [10]:
new_annot_df = new_annot_df.drop(['time'], axis='columns')
indeces = new_annot_df.index.to_series()
new_annot_df['time'] = indeces.map(get_time)
new_annot_df['condition'] = indeces.map(get_exp)
new_annot_df['date'] = indeces.map(get_date)
reorder = [col for col in new_annot_df.columns if col != 'Idents'] + ['Idents']
new_annot_df = new_annot_df[reorder]

new_annot_df

Unnamed: 0,experiment,Repeats,condition,date,time,Idents
20181215_CLK856_LD_ZT14_AR07_ACAGGA,CLK856_LD,LD_2,LD,2018-12-15,ZT14,29:LPN
20181215_CLK856_LD_ZT14_AR07_CTTCTG,CLK856_LD,LD_2,LD,2018-12-15,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR02_GTACCA,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR02_TCCTTC,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
20181231_CLK856_LD_ZT14_AR08_AGACAG,CLK856_LD,LD_1,LD,2018-12-31,ZT14,29:LPN
...,...,...,...,...,...,...
20190710_CLK856_DD_CT06_AR04_GTCTTC,CLK856_DD,DD_1,DD,2019-07-10,CT06,1:DN1p_CNMa
20190710_CLK856_DD_CT06_AR04_TCCTTC,CLK856_DD,DD_1,DD,2019-07-10,CT06,1:DN1p_CNMa
20190719_CLK856_DD_CT06_AR18_GTCTTC,CLK856_DD,DD_2,DD,2019-07-19,CT06,1:DN1p_CNMa
20190814_CLK856_DD_CT06_AR19_TCTGCA,CLK856_DD,DD_2,DD,2019-08-14,CT06,1:DN1p_CNMa


In [11]:
print(new_annot_df.columns)

Index(['experiment', 'Repeats', 'condition', 'date', 'time', 'Idents'], dtype='object')


Now, save to a new csv (with a slighly different name, to avoid confusion)

In [12]:
new_annot_df.to_csv('../neuron_annotations.csv')