### Creating master index based on features found in filenames

In [None]:
import glob, os, re
from importlib import reload

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib

import byc
from byc import steady_state_analysis as ssa
from byc import constants, files, utilities, plotting
for pkg in [constants, files, utilities, plotting]:
    reload(pkg)

patterns = constants.patterns
plotting.set_styles(plt, matplotlib)

In [6]:
def find_measdirname_features(measdirpath, feature_patterns):
    """
    Take a micromanager multi-d acquisition output directory, e.g.
    "20210421_pJC031_BY4741_constantOD_250uM-Tet_time000_1", find
    values for different feature categories defined in feature_patterns,
    and return a dictionary of those feature names and their regex
    match objects in a dictionary
    """
    measdirname = os.path.basename(os.path.dirname(measdirpath))
    matches_dict = {}
    sampledf = pd.DataFrame({'sample': measdirname}, index=[0])
    for key, val in feature_patterns.items():
        feature_match = re.search(val, measdirname)
        if feature_match:
            matches_dict[key] = feature_match
            
    matches_dict['path'] = measdirpath.replace(measdirname, '')
    matches_dict['measdirname'] = measdirname
    return matches_dict

def measdf_from_features_dict(features_dict):
    """
    Iterate through each regex match object in the features
    dict, if the feature needs to be quantified, extract the 
    number. If not, extract only the match.group(). Then put
    these feature names and values into a single row dataframe
    for the measurement
    
    Return the dataframe
    """

    for key, val in features_dict.items():
        group0_vars = ['tet_concn',
                       'estradiol_concn']
        group1_vars = ['minutes',
                       'clone_number']
        # Some information annotated in find_measdirname_features
        # is already a string etc., so only extract groups if
        # the value in the features_dict is actually an re.Match
        if type(val) == re.Match:
            if key in group1_vars:
                number = val.groups()[1]
                features_dict[key] = np.float(number)
            elif key in group0_vars:
                number = val.groups()[0]
                features_dict[key] = np.float(number)
            else:
                value = val.group()
                features_dict[key] = value

    measdf = pd.DataFrame(features_dict, index=[0])
    return measdf

def make_ss_mdf(exptname, **kwargs):
    """
    Create and save master index made by scanning the directory
    matching `exptname` in constants.steady_state_data_dir
    and looking for features in those micromanager output
    directories defined in featuer_patterns
    """
    write_mdf = kwargs.get('write_mdf', True)
    return_mdf = kwargs.get('write_mdf', True)
    ssdir = constants.steady_state_data_dir
    exptdir = os.path.join(ssdir, exptname)
    datadir = os.path.join(exptdir, 'data')
    measdirpaths = glob.glob(f"{datadir}/*/")
    measdirpaths = [p for p in measdirpaths if os.path.isdir(p)]

    feature_patterns = {'expt_date': patterns.date,
                        'plasmid': patterns.plasmid_name,
                        'genotype': patterns.genotype,
                        'tet_concn': patterns.tet_concn,
                        'estradiol_concn': patterns.estradiol_concn,
                        'culture_condition': patterns.culture_condition,
                        'minutes': patterns.timepoint_mins,
                        'clone_number': patterns.clone_number}

    # Make indivudal rows of the master index
    measdfs = []
    for measdirpath in measdirpaths:
        features_dict = find_measdirname_features(measdirpath, feature_patterns)
        measdf = measdf_from_features_dict(features_dict)
        measdfs.append(measdf)
    # Create the master index
    mdf = pd.concat(measdfs, ignore_index=True)
    
    if write_mdf:
        filename = f'{mdf.expt_date.iloc[0]}_master_index.csv'
        writepath = os.path.join(exptdir, filename)
        mdf.to_csv(writepath, index=False)
        print(f'Saved master index at \n{writepath}')
    if return_mdf:
        return mdf

### Create master index using features found for each measurement dir

In [7]:
mdf = make_ss_mdf("20210421_pJC031_BY4741_rpn4d_timecourse")

Saved master index at 
C:\Users\John Cooper\Box Sync\Finkelstein-Matouschek\images\20210421_pJC031_BY4741_rpn4d_timecourse\20210421_master_index.csv


### Adapt `byc.files.rename_steady_state()` to work more flexibly with master indexes made above

Ultimately I think I'll need to more significantly change `steady_state_analysis.make_expt_df()` so that it looks for .csv measurement files using regex rather than explicit strings