# Explore GNF normalization methods

## 0 Packages and presets

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from myImageLib import dirrec, bestcolor, bpass, wowcolor
from skimage import io, measure
import pandas as pd
from scipy.signal import savgol_filter, medfilt
import os
import corrLib
from numpy.polynomial.polynomial import polyvander
from scipy.optimize import curve_fit
from miscLib import label_slope
from scipy import signal
from scipy.interpolate import griddata
import matplotlib
import pandas as pd
from scipy.ndimage import gaussian_filter1d, uniform_filter1d
import typesetting.main as tm
from corr_utils import *
from IPython.display import clear_output
from log import experiment_log

In [2]:
color_dict, marker_list = unified_symbols() # keep color and symbol consistent across different plot, for same concentrations
data_master_dir = r'E:\Google Drive' # data folder: Google drive for now, but may change in the future
tm.prl('1-column-2-panel') # control the default plotting configurations, now I realize the parameters should vary from plot to plot
dirs = data_log_mapping(kw='aug') # video info for GNF raw data, obtained from Aug 3rd to Aug 6th
log_df = experiment_log()

-------The log looks OK!--------


## 1 Large scale

In [None]:
def postprocess_gnf(gnf_data, lb, xlim=None, sparse=3, normalize='1', volume_fraction=None ,mpp=0.33):
    """
    Postprocess raw GNF data for plotting.
    
    Since we change the way of preparing GNF data, the corresponding function which is responsible for preparing ready-to-plot data needs to be modified. As far as I am concerned, the only function that needs to be changed is the `postprocess_gnf()`. To avoid issues, I want to keep the default behavior of the function, which rescale the starting point of all curves to 1. An additional keyword argument `normalize` will be added, and default to `'1'`, which standards for rescaling by the first point. Optionally, `normalize` can be set to `small-scale`, which applies the normalization described in Section 3.2. If `small-scale` is chosen, an additional keyword argument, `volume_fraction` will be required in order to calculate the rescaling factor. (implement after dinner)
    
    Args:
    gnf_data -- DataFrame containing columns ('n', 'd'), generated by df2_nobp.py or df2_kinetics.py
    lb -- size of bacteria (pixel, normalizing factor of x axis)
    xlim -- box size beyond which the data get cut off (pixel), can be either integer or a list of 2 integers
            if xlim is int, it is the upper limit, data above xlim will be cut off,
            if xlim is a list, data outside [xlim[0], xlim[1]] will be cut off
    sparse -- the degree to sparsify the data, 1 is doing nothing, 3 means only keep 1/3 of the orginal data
    normalize -- the method to normalize the data. Choose from '1', None or 'small-scale'.
                 '1': rescale y with y[0]
                 'small-scale': rescale y with y[0] / \sqrt{1 - volume_fraction}. Additional volume_fraction arg is required.
                 None: no normalization will be applied.
    Returns:
    x, y -- a tuple that can be plotted directly using plt.plot(x, y)
    
    Edit:
    12022020 -- Initial commit.
    
    Test:
    # test new postprocess_gnf(gnf_data, lb, xlim=None, sparse=3, normalize='1', volume_fraction=None ,mpp=0.33)
    data = pd.read_csv(r'E:\moreData\08032020\df2_kinetics\01\kinetics_data.csv')
    gnf_data = data.loc[data.segment==50]
    lb = 10
    # test normalize = '1'
    x, y = postprocess_gnf(gnf_data, lb, xlim=[10, 10000], sparse=3, normalize='1')
    plt.plot(x, y, label='1')
    # test normalize = 'small-scale'
    x, y = postprocess_gnf(gnf_data, lb, xlim=[1, 10000], sparse=3, normalize='small-scale', volume_fraction= 0.064)
    plt.plot(x, y, label='small-scale')
    # test normalize = '1'
    x, y = postprocess_gnf(gnf_data, lb, xlim=[1, 10000], sparse=3, normalize=None)
    plt.plot(x, y, label='None')
    plt.loglog()
    plt.legend(fontsize=5)
    plt.xlabel('$l^2/l_b^2$')
    plt.ylabel('$\Delta N/\sqrt N$')
    """    
    
    if xlim == None:
        data = gnf_data
    elif isinstance(xlim, int):
        data = gnf_data.loc[gnf_data.n < xlim*lb**2]
    elif isinstance(xlim, list) and len(xlim) == 2:
        data = gnf_data.loc[(gnf_data.n>=xlim[0]*lb**2)&(gnf_data.n < xlim[1]*lb**2)]  
    
    if normalize == '1':
        xx = data.n / lb**2
        yy = data.d / data.n**0.5
        yy = yy / yy.iat[0]
    elif normalize == None:
        xx = data.n / lb**2
        yy = data.d / data.n**0.5
    elif normalize == 'small-scale':
        assert(volume_fraction is not None)
        assert(volume_fraction < 1)
        assert(xlim[0] <= 1) # make sure the first data point is at a smaller scale than lb
        xx = data.n / lb**2
        yy = data.d / data.n**0.5
        yy = yy / yy.iat[0] * (1 - volume_fraction) ** 0.5        
    else:
        raise ValueError('Invalid normalize argument')
    
    # sparcify
    x = xx[0:len(xx):sparse]
    y = yy[0:len(xx):sparse]
    
    return x, y