In [6]:
from frgpascal.analysis.processing import load_all, compress_jv, get_worklist_times
from scipy import stats
from natsort import natsorted
from natsort import index_natsorted
import copy
from mpl_toolkits.axes_grid1 import make_axes_locatable
import matplotlib.colorbar
import seaborn as sns
import warnings
from tqdm import tqdm
from matplotlib import style
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from IPython.display import set_matplotlib_formats
import os
import json
import pickle as pkl

%config InlineBackend.figure_format = 'retina'
mpl.rcParams.update(mpl.rcParamsDefault)
mpl.rcParams['axes.linewidth'] = 1.75 #set the value globally

import time



In [None]:
def get_additional_params(paramdf):
    data = {}
    # data['storage_tray'] = []
    data['sample_number'] = []
    data['substrate'] = []


    # data['spincoat1_drop1_time'] = []
    # data['spincoat1_drop1_rate'] = []
    # data['spincoat1_drop1_height'] = []
    # data['spincoat1_drop1_volume'] = []


    for n in range(len(paramdf)):
        sample_number = paramdf['name'][n]
        substrate = paramdf['substrate'][n]
        # storage_tray = paramdf['storage_tray'][n]
        
        # spincoat1_drop1_time = paramdf['spincoat1_drop1_time'][n]
        # spincoat1_drop1_rate = paramdf['spincoat1_drop1_rate'][n]
        # spincoat1_drop1_height = paramdf['spincoat1_drop1_height'][n]
        # spincoat1_drop1_volume = paramdf['spincoat1_drop1_volume'][n]


        data['sample_number'].append(sample_number)
        data['substrate'].append(substrate)

        # data['spincoat1_drop1_time'].append(spincoat1_drop1_time)
        # data['spincoat1_drop1_rate'].append(spincoat1_drop1_rate)
        # data['spincoat1_drop1_height'].append(spincoat1_drop1_height)
        # data['spincoat1_drop1_volume'].append(spincoat1_drop1_volume)

    return data

In [None]:
def load_all_sorted(chardir):
    metricdf, rawdf = load_all(
        chardir,
        t_kwargs=dict(
            wlmin=700,
            wlmax=900
        )
    )
    rawdf= rawdf.sort_values(
   by='name',
   key=lambda x: np.argsort(index_natsorted(rawdf['name']))
    )
    rawdf = rawdf.reset_index(drop=True)


    metricdf= metricdf.sort_values(
    by='name',
    key=lambda x: np.argsort(index_natsorted(metricdf['name']))
    )
    metricdf = metricdf.reset_index(drop=True)
    return metricdf, rawdf


In [3]:
def adjust_time(timedf):
    timedf_0 = timedf

    time_list = []

    for n in range(len(timedf_0)):
        time_list.append(timedf_0['spincoat0'][n][0])
        
        
    for n in range(len(timedf_0)):
        timedf_0['spincoat0'][n] = time_list[n]
    
    return timedf_0

In [5]:
def rename_duplicate_cols(df):
    df = df
    cols=pd.Series(df.columns)

    for dup in cols[cols.duplicated()].unique(): 
        cols[cols[cols == dup].index.values.tolist()] = [dup + '.' + str(i) if i != 0 else dup for i in range(sum(cols == dup))]

    # rename the columns with the cols list.
    df.columns=cols 
    return df

In [9]:
def correlation_plot(metricdf, x_col=str, y_col=str):
    metricdf=metricdf
    fig, ax = plt.subplots()
    x = metricdf[x_col].astype(float)
    y = metricdf[y_col].astype(float)
    sns.scatterplot(x = x, y = y ,ax=ax, color='black', alpha=1, legend=None)
    sns.kdeplot(x = x, y = y,  cmap="Greys_r", shade=True, bw_method='scott', ax=ax, alpha=.2)
    res = stats.linregress(x, y)
    rsq = res.rvalue**2
    ax.plot(x, res.intercept + res.slope*x, 'r')#, label=f'R$^2$:{rsq:.2f}', color='springgreen')
    plt.text(0.01, .95, s = (f'R$^2$:{rsq:.2f}'), horizontalalignment='left', verticalalignment='top', transform=ax.transAxes, color='red', weight='bold')
    plt.ylabel(y.name, size=15)
    plt.xlabel(x.name, size=15)
    
    TodaysDate = time.strftime("%Y%m%d")
    plt.savefig(f'{TodaysDate}_{x_col}_{y_col}.png', dpi=300, bbox_inches='tight')

In [4]:
def batch_process(batch =str, chardir=str, paramdir=str, logdir=str, jvdir=None, drop_low_pl = 50, save=True):
    chardir_0 = chardir
    paramdir_0 = paramdir
    logdir_0 = logdir
    jvdir_0 = jvdir
    
    TodaysDate = time.strftime("%Y%m%d")
    fp = "{}_{}_analysis".format(TodaysDate,batch)
    if not os.path.exists(fp):
        os.mkdir(fp)
    os.chdir(fp)

    paramdf_0 = pd.read_csv(paramdir_0)
    paramdf_0 = paramdf_0.sort_values(by='name', key=lambda x: np.argsort(index_natsorted(paramdf_0['name'])))
    paramdf_0 = paramdf_0.reset_index(drop=True)
    metricdf_0, rawdf_0 = load_all_sorted(chardir_0)
    timedf_0 = adjust_time(get_worklist_times(logdir_0))

    if jvdir_0 !=None:
        jvdf_0 = compress_jv(jvdir_0)
        test0 = pd.concat([paramdf_0, metricdf_0], axis=1)
        test1 = pd.concat([timedf_0, jvdf_0], axis=1)
        test2 = pd.concat([test0, test1], axis=1)
        test3 = pd.concat([test2, rawdf_0], axis=1)
    if jvdir_0 == None:
        test0 = pd.concat([paramdf_0, metricdf_0], axis=1)
        test2 = pd.concat([test0, timedf_0], axis=1)
        test3 = pd.concat([test2, rawdf_0], axis=1)

    test2, test3 = rename_duplicate_cols(test2), rename_duplicate_cols(test3)

    test2 = test2[~(test2['pl_intensity_0'] <= drop_low_pl)]  
    
    metricdf, rawdf = test2, test3
    
    # chronoglical plots
    correlation_plot(metricdf, x_col='spincoat0', y_col='pl_intensity_0')
    correlation_plot(metricdf, x_col='spincoat0', y_col='pl_peakev_0')
    correlation_plot(metricdf, x_col='spincoat0', y_col='pl_fwhm_0')
    if jvdir_0 !=None:
        correlation_plot(metricdf, x_col='spincoat0', y_col='pce_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='pce_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='ff_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='ff_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='voc_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='voc_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='jsc_f')
        correlation_plot(metricdf, x_col='spincoat0', y_col='jsc_f')

        # compare to PL
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='jsc_f')
        correlation_plot(metricdf, x_col='pl_intensity_0', y_col='jsc_f')

        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='jsc_f')
        correlation_plot(metricdf, x_col='pl_peakev_0', y_col='jsc_f')
        
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='pce_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='ff_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='voc_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='jsc_f')
        correlation_plot(metricdf, x_col='pl_fwhm_0', y_col='jsc_f')
    
    


