**Purpose:** See how YMCs in mutants differ from each other and wild-type.

**Aims:**
- Import flavin signals from multiple strains in the same experiment (and thus same nutrient conditions).
   - Obvious dataset: Causton strains, because there are five strains.
- Process data: cut time series to duration of interest, detrend flavin signals.
- Featurise data: use `catch22`
- Use UMAP to visualise the relationship between the data.
   - Adjust hyperparameters as appropriate to help with visualisation.
   - Potentially use the labels themselves to perform supervised UMAP.  This will hopefully separate the classes while retaining some local and global structure.

**Paradigms:**
- Use `aliby` data structures, i.e. `pandas` `DataFrames` with multi-indexing.
- Use `postprocessor` processes for featurisation
- Use `scikit-learn` and `umap` routines.
- Ultimate goal to put all the cells together in a script to put in `skeletons` (especially if `svm_sandbox.ipynb` and `cycle_alignment_sandbox.ipynb` share *many* cells with this one).

- Put everything into one long script so that I can generate 16 plots by pressing Return once.

In [None]:
import PyQt5
%matplotlib qt

# Import data

In [None]:
# These functions will be modules/scripts of their own, eventually
# and the whole damn thing will be a process.

import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import seaborn as sns
import umap
import umap.plot
from sklearn.preprocessing import StandardScaler
from postprocessor.core.processes.catch22 import catch22Parameters, catch22

# FIXED PARAMETERS
interval_start = 25
interval_end = 168
window = 45

# PARAMETERS TO ITERATE/CHANGE
#omeroid = '19979'
omeroid = '20016'
featurisation_mode = 'catch22'
# featurisation_mode = 'timepoints'
label_mode = 'strain'
# label_mode = 'custom'
umap_supervised = True


# Keeping this for when I merge cycle_alignment.ipynb into this too
def convert_df_to_aliby(signal, strainlookup_df):
    '''
    Converts an dataframe of signals into the format usually used by aliby
    '''
    # Import look-up table for strains (would prefer to directly CSV -> dict)
    strainlookup_dict = dict(zip(strainlookup_df.position, strainlookup_df.strain))
    
    # Positions -> Strain (more informative)
    signal = signal.replace({'position': strainlookup_dict})
    signal.rename(columns = {"position": "strain"}, inplace = True)
    signal = signal.drop(['distfromcentre'], axis = 1)

    # Convert to multi-index dataframe
    signal_temp = signal.iloc[:,2:]
    multiindex = pd.MultiIndex.from_frame(signal[['strain', 'cellID']])
    signal = pd.DataFrame(signal_temp.to_numpy(),
                          index = multiindex)
    
    return signal


def import_my_data(omeroid):
    filename_prefix = './data/arin/Omero'+omeroid+'_'
    
    # Flavin signals
    signal_flavin = pd.read_csv(filename_prefix+'flavin.csv')
    
    # Strain lookup table
    strainlookup_df = pd.read_csv(filename_prefix+'strains.csv')
    
    signal_flavin.replace(0, np.nan, inplace=True)  
    signal_flavin = convert_df_to_aliby(signal_flavin, strainlookup_df)
    
    return signal_flavin


def moving_average(input_timeseries, window = 3):
    processed_timeseries = np.cumsum(input_timeseries, dtype=float)
    processed_timeseries[window:] = processed_timeseries[window:] - processed_timeseries[:-window]
    
    return processed_timeseries[window - 1 :] /  window


def detrend(signal, window):
    signal = signal.div(signal.mean(axis = 1), axis = 0)
    signal_movavg = signal.apply(lambda x: pd.Series(moving_average(x.values, window)), axis = 1)
    signal_norm = signal.iloc(axis = 1)[window//2: -window//2] / signal_movavg.iloc[:,0:signal_movavg.shape[1]-1].values

    return signal_norm


def featurise(signal, featurisation_mode):
    if featurisation_mode == 'catch22':
        catch22_processor = catch22(catch22Parameters.default())
        features = catch22_processor.run(signal)
    elif featurisation_mode == 'timepoints':
        features = signal
        
    scaled_features = StandardScaler().fit_transform(features)
    
    return scaled_features


def label(signal, label_mode):
    if label_mode == 'strain':
        label_list = signal.index.get_level_values('strain')
        label_unique = label_list.unique().to_list()
        label_map = dict(zip(label_unique, list(range(len(label_unique)))))
        label_list_numerical = [label_map.get(item, item) for item in label_list]
    elif label_mode == 'custom':
        label_filename = 'categories_'+omeroid+'_detrend.csv'
        targets = pd.read_csv(label_filename, header = None, index_col = 0)
        targets.index.names = ['cellID']
        targets.columns = ['category']
        label_list = np.array([
            targets.loc[cellID].item()
            for cellID in signal.index.get_level_values('cellID')
        ])
        label_list_numerical = label_list
    
    return label_list, label_list_numerical


def draw_umap(umap_supervised):
    # Fit
    reducer = umap.UMAP(
        random_state = 0,
        n_neighbors = 10,
        min_dist = 0.05,
        n_components = 2,
        metric = 'euclidean',
    )

    if umap_supervised:
        y = label_list_numerical
    else:
        y = None
    mapper = reducer.fit(
        scaled_features,
        y = y,
    )

    # Plot
    umap.plot.points(
        mapper,
        labels = label_list,
        color_key_cmap = 'Paired',
    )

    
def umap_altogether_pipeline(
    omeroid,
    window,
    featurisation_mode,
    label_mode,
    umap_supervised,
):
    # Import
    signal_flavin = import_my_data(omeroid)

    # Chop up time series
    signal_flavin_processed = signal_flavin.iloc[:, interval_start:interval_end].dropna()

    # Detrend
    signal_flavin_processed = detrend(signal_flavin_processed, window)

    # Featurisation
    scaled_features = featurise(signal_flavin_processed, featurisation_mode)

    # Label  
    label_list, label_list_numerical = label(signal_flavin_processed, label_mode)

    # UMAP
    draw_umap(umap_supervised)
    
    
umap_altogether_pipeline(
    omeroid,
    window,
    featurisation_mode,
    label_mode,
    umap_supervised,
)
plt.title(
    'Experiment '+omeroid+', '
    +'featurisation using '+featurisation_mode+', '
    +'labelling '+label_mode+', '
    +'UMAP supervision'+str(umap_supervised)
)

To do: add way to mouse over points and see what the time series looks like