In [1]:
import numpy as np
import pandas as pd
import os

# function to get .csv files in current directory
def csv_files():
    all_files = os.listdir()
    csvs = [f for f in all_files if '.csv' in f]
    return csvs

In [2]:
#%% import original dataframe
for datafile in csv_files():
    df = pd.read_csv(datafile)

    # remove first underscore from monospecies treatment names
    treatments = []
    for treatment_name in df.Treatment.values:
        if "Monospecies" in treatment_name:
            splits = treatment_name.split("_")
            treatment_name = splits[0] + splits[1] + "_" + splits[2]
        treatments.append(treatment_name)
    df['Treatment'] = treatments

    # need passage info for every sample 
    treatments = []
    for time, treatment_name in zip(df.Time.values, df.Treatment.values):
        if 'passage' not in treatment_name:
            treatment_name += "_passage1.0"
        treatments.append(treatment_name)
    df['Treatment'] = treatments

    #%%
    all_treatments = df['Treatment'].values
    species = ["S"+str(i+1) for i in range(10)]

    #%% remove replicate info from treatment names
    rep_treatments = []
    for t in all_treatments:
        bits = t.split("_")
        rep_treatments.append(bits[0] + "_" + bits[-1])
    rep_treatments = np.array(rep_treatments)
    unique_treatments = np.unique(rep_treatments)

    #%% average replicates
    new_df = pd.DataFrame(columns=['Treatments', 'Time'] + list(species))

    for treatment in unique_treatments:
        inds = rep_treatments == treatment
        df_treatment = df.iloc[inds, :].copy()
        times = df_treatment.Time.values
        unique_times = np.unique(times)
        for t in unique_times:
            time_inds = times == t
            df_treatment_times = df_treatment.iloc[time_inds, :].copy()

            new_df_block = pd.DataFrame(columns=species, data=np.atleast_2d(df_treatment_times[species].mean(axis=0).values))
            new_df_block.insert(0, "Time", t)
            new_df_block.insert(0, "Treatments", treatment)
            new_df = new_df.append(new_df_block)

    # save data 
    fiber = df.Media.values[0]
    new_df.to_csv(f"../{fiber}_full_passage.csv", index=False)

    # Format data keeping replicates 

    #%% save mean and std of each replicate
    reps_df = pd.DataFrame(columns=['Treatments', 'Time', 'Species','E[Measured]','std[Measured]'])

    for treatment in unique_treatments:
        inds = rep_treatments == treatment
        df_treatment = df.iloc[inds, :].copy()
        times = df_treatment.Time.values
        unique_times = np.unique(times)
        for t in unique_times:
            if t > 0:
                time_inds = times == t
                df_treatment_times = df_treatment.iloc[time_inds, :].copy()

                new_df_block = pd.DataFrame(columns=['Treatments', 'Time', 'Species','E[Measured]','std[Measured]'])
                mean_species = df_treatment_times[species].mean(axis=0).values
                std_species = np.nan_to_num(df_treatment_times[species].std(axis=0).values)

                new_df_block['Species'] = species
                new_df_block['Treatments'] = treatment     
                new_df_block['Time'] = t
                new_df_block['E[Measured]'] = mean_species
                new_df_block['std[Measured]'] = std_species
                reps_df = reps_df.append(new_df_block)           

    reps_df.to_csv(f"../../Kfold/Tables/{fiber}_measured.csv", index=False)