In [143]:
import pandas as pd
import os
from configobj import ConfigObj

In [144]:
full_data_df = pd.read_csv("../Sickle-trait-RNAseq/Data/TPMs.csv", index_col=0)

In [147]:
def drop_n_config(df):
    
    cwd = os.getcwd()
    # convert column names to timepoints
    cols = df.columns.tolist()
    conv = [int(c[4:6])*3 for c in cols]
    cols2tp_dic = {col:c for col,c in zip(cols, conv)}
    tp_df = df.rename(columns=cols2tp_dic)
    conv.sort()
    sorted_tp_df = tp_df[conv]
    # SAVE DF TO FILE
    filename = "_".join([cols[0].split("_")[0][:4], cols[0].split("_")[1], "ts.tsv"])
    full_filename = os.path.join(cwd, filename)
    sorted_tp_df.to_csv(full_filename, sep="\t")
    
    filtered_ts_df = sorted_tp_df[sorted_tp_df.apply(lambda row: sum(x <= 1 for x in row), axis = 1) <= sorted_tp_df.shape[1]/2]
    filtered_filename = "_".join([cols[0].split("_")[0][:4], cols[0].split("_")[1], "ts_filtered.tsv"])
    full_filtered_filename = os.path.join(cwd, filtered_filename)
    filtered_ts_df.to_csv(full_filtered_filename, sep="\t")
    
    config_dict = dict()
    config_dict["data_file"] = full_filtered_filename
    config_dict["annotation_file"] = os.path.join(cwd, "empty_annot.tsv")
    config_dict["output_dir"] = os.path.join(cwd, "_".join([cols[0].split("_")[0][:4], cols[0].split("_")[1], "Results"]))
    config_dict["num_proc"] = 4
    config_dict["verbose"] = True

    config_dict["dlxjtk_arguments"] = dict()
    config_dict["dlxjtk_arguments"]["periods"] = 45, 48, 51
    config_dict["dlxjtk_arguments"]["dlxjtk_cutoff"] = 10
    config_dict["dlxjtk_arguments"]["num_reg"] = 1000000
    config_dict["dlxjtk_arguments"]["num_per"] = 10000
    
    co_obj = ConfigObj(config_dict)
    co_filename = os.path.join(cwd, "_".join([cols[0].split("_")[0][:4], cols[0].split("_")[1], "config.txt"]))
    print(co_filename)
    co_obj.filename = co_filename
    co_obj.write()

In [148]:
# 3D7
AA1_3D7_df = full_data_df.filter(regex='(AA13.*3D7)')
AA1_3D7_config = drop_n_config(AA1_3D7_df)
AA2_3D7_df = full_data_df.filter(regex='(AA17.*3D7)')
AA2_3D7_config = drop_n_config(AA2_3D7_df)
AS1_3D7_df = full_data_df.filter(regex='(AS15.*3D7)')
AS1_3D7_config = drop_n_config(AS1_3D7_df)
AS2_3D7_df = full_data_df.filter(regex='(AS16.*3D7)')
AS2_3D7_config = drop_n_config(AS2_3D7_df)

# FUP
AA1_FUP_df = full_data_df.filter(regex='(AA13.*FUP)')
AA1_FUP_config = drop_n_config(AA1_FUP_df)
AA2_FUP_df = full_data_df.filter(regex='(AA17.*FUP)')
AA2_FUP_config = drop_n_config(AA2_FUP_df)
AS1_FUP_df = full_data_df.filter(regex='(AS18.*FUP)')
AS1_FUP_config = drop_n_config(AS1_FUP_df)
AS2_FUP_df = full_data_df.filter(regex='(AS19.*FUP)')
AS2_FUP_config = drop_n_config(AS2_FUP_df)

/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AA13_3D7_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AA17_3D7_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AS15_3D7_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AS16_3D7_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AA13_FUP_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AA17_FUP_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AS18_FUP_config.txt
/Users/robertmoseley/Desktop/Malaria-Sickle/Scripts/AS19_FUP_config.txt
