**Purpose:** Test if featurisation using `catch22` affects mutual information algorithm.

**Aims:**

- Import SFP1 localisation signals (Alejandro Granados).

- Featurise data: use `catch22` or time series

- Compute the mutual information between pairs of strains, treating mutual information as any other machine learning measure.
  - Mutual information asks the question: can you tell apart a typical time series from dataset A and a typical time series from dataset B?  0 means 'no', 1 means 'yes', intermediate values can be used as similarity measures.

**Paradigms:**
- Use `aliby`-style data structures and `postprocessor` processes for featurisation.

In [None]:
%matplotlib inline

# Main

In [None]:
import numpy as np
import pandas as pd
import csv
import matplotlib.pyplot as plt
import seaborn as sns

from postprocessor.core.processes.catch22 import catch22Parameters, catch22
from postprocessor.core.processes.mi import miParameters, mi

# PARAMETERS
filename_prefix = './data/agranados/infdata_rep'
MEDIUM_LIST = ['rich', 'stress']

# SET UP PROCESSORS
catch22_processor = catch22(catch22Parameters.default())
mi_params = miParameters.default()
mi_params.overtime = False
mi_params.n_bootstraps = 100
mi_processor = mi(mi_params)

def convert_csv_to_aliby(replicate, medium):
    signal = pd.read_csv(filename_prefix + str(replicate) + '_' + medium + '.csv')
    multiindex_array = [[medium] * len(signal), list(range(len(signal)))]
    multiindex = pd.MultiIndex.from_arrays(multiindex_array, names=("strain", "cellID"))
    signal = pd.DataFrame(signal.to_numpy(), multiindex)
    return signal

def convert_agranados_to_aliby(replicate, medium_list):
    return pd.concat([convert_csv_to_aliby(replicate, medium) for medium in medium_list])

def pretty_print_mi(replicate, featurisation, mi_array):
    print(
        'Replicate ' +
        str(replicate) +
        ' ' +
        featurisation +
        ': mutual information median = ' +
        str("{:.3f}".format(mi_array[0][0])) +
        ' (CI ' +
        str("{:.3f}".format(mi_array[0][1])) +
        ', ' +
        str("{:.3f}".format(mi_array[0][2])) +
        ')'
    )

for replicate in range(1,6):
    # IMPORT DATA AND VISUALISE
    signal = convert_agranados_to_aliby(1, MEDIUM_LIST)
    sns.heatmap(signal)
    plt.title('Time series, replicate ' + str(replicate))
    plt.xlabel('Time point')
    plt.show()

    # COMPUTE MUTUAL INFORMATION ON TIME SERIES
    pretty_print_mi(replicate, 'timepoints', mi_processor.run(signal))

    # catch22 FEATURISATION
    features = catch22_processor.run(signal)
    plt.title('catch22 featurisation, replicate ' + str(replicate))
    sns.heatmap(features)
    plt.show()

    # COMPUTE MUTUAL INFORMATION ON catch22
    pretty_print_mi(replicate, 'catch22', mi_processor.run(features))