# load modules

In [1]:
import os,sys,re
import glob
import pandas as pd
import scipy.io
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
import time 
import math
import tifffile as tiff
import tensorflow as tf
import pickle#save multiple variables into a pickle file

%matplotlib inline
from tqdm import tqdm_notebook as tqdm
from datetime import datetime

#add folder to the system-path at runtime, for importing file in another folder
if '../shiny_files' not in sys.path:
    sys.path.insert(0, '../shiny_files')
import func_get_data as fx

# load the data 

In [2]:
version_name = 'true_tif' #'v11'
#input_dir = 'F:\emb_data'
input_dir = os.path.join(os.getcwd(),'..','..','emb_data')

#use the below arguments and save different csv file
use_predict_tif = False #(False):use true labels tif . (True or has_processed=False): use predict.tif 
#default is to plot tp emb.
plot_fn = True #(TRUE):plot fn emb. from false_positive_index.txt 
plot_fp = False 
inv_c = True #stem curved like inverse of letter C or not

'''
Paths
'''
disk_path = os.path.join(input_dir,version_name)
all_folders_name = np.sort(os.listdir(disk_path))

all_folders_dir = [os.path.join(disk_path,folder) for folder in all_folders_name]

n_folders = len(all_folders_name)#[Diane 0522]

#output folder name based on user-specified arguments
if use_predict_tif:
    folder_tag1 = 'pred_tif'
else:
    folder_tag1= 'true_tif'

if plot_fn:
    folder_tag2 = 'has_fn'
else:
    folder_tag2 = 'no_fn'

if plot_fp:
    folder_tag3 = 'has_fp'
else:
    folder_tag3 = 'no_fp'

FileNotFoundError: [WinError 3] The system cannot find the path specified: 'D:\\Diane\\Col\\research\\tian_grp_meeting\\emb_proj\\code\\embolism_project\\analysis\\..\\..\\emb_data\\true_tif'

# Define the functions

In [None]:
def compute_cc_emb_info(folder_idx):
    folder_name_short = all_folders_name[folder_idx]
    dir_path = all_folders_dir[folder_idx]
    input_folder_tag = '_'.join((folder_tag1,folder_tag2,folder_tag3))
    input_folder = os.path.join(dir_path,input_folder_tag)
    pickle_dict = pickle.load(open(os.path.join(input_folder,'shinydata.pkl'),"rb"))#dictionary
    plot_mat_time = pickle_dict['plot_mat_time']
    cc_emb_info = plot_mat_time.drop(['row','col'], axis = 1)#drop two columns that are of pixel level (smaller than cc level)
    cc_emb_info = cc_emb_info.drop_duplicates()#keep unique rows
    x_mean = np.mean(plot_mat_time['col'])
    #[Diane 0522]returns plot_mat_time, else there'll be err in compute_col_dist
    return cc_emb_info, x_mean, folder_name_short, plot_mat_time 

In [None]:
def compute_col_dist(cc_emb_info,x_mean,plot_mat_time):
    cc_centroid_col_dist_to_mean = cc_emb_info['cc_centroid_col'].apply(lambda x: abs(x - x_mean))
    cc_centroid_col_dist_to_mean = pd.concat([cc_centroid_col_dist_to_mean.reset_index(drop=True),cc_emb_info['time_since_start(mins)'].reset_index(drop=True)],axis=1)
    cc_centroid_col_dist_to_mean = cc_centroid_col_dist_to_mean.rename(columns = {'cc_centroid_col':'cc_centroid_col_dist_to_mean'})
    cc_col_dist_mean = cc_centroid_col_dist_to_mean.groupby(['time_since_start(mins)'])['cc_centroid_col_dist_to_mean'].mean().reset_index()
    cc_col_dist_mean = cc_col_dist_mean.rename(columns = {'cc_centroid_col_dist_to_mean':'cc_cen_col_dist_mean'})
    summary_statistics = plot_mat_time.groupby('number_emb').mean().iloc[:,-6:]#img level
    summary_statistics = summary_statistics.merge(cc_col_dist_mean, on = 'time_since_start(mins)')
    summary_statistics['folder_name'] = folder_name_short.lower()
    return summary_statistics

In [None]:
def summary_statistics_all(n_folders):
    total_summary_stats = pd.DataFrame()
    for i in tqdm(range(n_folders)):
        cc_emb_info, x_mean, folder_name_short , plot_mat_time = compute_cc_emb_info(i)#[Diane 0522]
        summary_statistics = compute_col_dist(cc_emb_info,x_mean, plot_mat_time)
        summary_statistics['folder_name'] = folder_name_short.lower()#[Diane 0522]
        total_summary_stats = total_summary_stats.append(summary_statistics)
    return total_summary_stats

# Cumulative embolized area vs time

In [None]:
cc_area_summary = pd.DataFrame()
for i in tqdm(range(n_folders)):
    cc_emb_info, x_mean, folder_name_short, _ = compute_cc_emb_info(i)#[Diane 0522]
    summary_statistics = cc_emb_info.groupby('time_since_start(mins)').agg({'cc_area':'sum','number_emb':'median'}).reset_index()
    summary_statistics['folder_name'] = folder_name_short.lower()
    summary_statistics['cumsum_cc_area'] = summary_statistics.cc_area.cumsum()/summary_statistics.cc_area.cumsum().max()
    cc_area_summary = cc_area_summary.append(summary_statistics)

In [None]:
cc_area_summary

In [None]:
import plotly.offline as pyo
import plotly.express as px
import plotly.graph_objects as go
pyo.init_notebook_mode()
fig = px.line(cc_area_summary, x="time_since_start(mins)", y="cumsum_cc_area", color = 'folder_name')
fig.show()

In [None]:
fig.write_html("percentage.html")#save above fig into html

# Total summary statistics for all folders

doesn't include pixel position of embolism

In [None]:
total_summary_stats = summary_statistics_all(n_folders)#[Diane 0522]
all_folders_name_short = np.unique(total_summary_stats.folder_name)#[Diane 0522]

In [None]:
total_summary_stats#[Diane 0522]

# Potential Problem of the above total_summary_stats:

cc_wdith, cc_height, cc_area, cc_centroid_row, cc_centroid_col are NOT average across all the embolism in one img.

<span style="color:red">  Therefore, the distribution of centroid position below might not be representative. </span>

# Distribution of centroid position (might be incorrect, see the reason above)

should be centroid position for every embolism event in an experiment

In [None]:
folder_idx1 = 0#[Diane 0522] introduce folder_idx and plot title
folder_idx2 = 1

ss_1 = total_summary_stats[total_summary_stats.folder_name == all_folders_name_short[folder_idx1]]
ss_2 = total_summary_stats[total_summary_stats.folder_name == all_folders_name_short[folder_idx2]]
#fig, ax = plt.subplots(1,2,figsize = (10,10))
fig1 = sns.jointplot(y = ss_1.cc_centroid_row , x= ss_1.cc_centroid_col,cmap="Reds",kind = 'kde')
fig1.fig.suptitle("%s" % all_folders_name_short[folder_idx1])#[Diane 0522]
fig2 = sns.jointplot(y = ss_2.cc_centroid_row , x= ss_2.cc_centroid_col,cmap="Blues",kind = 'kde')
fig2.fig.suptitle("%s" % all_folders_name_short[folder_idx2])

In [None]:
import plotly.offline as pyo
import plotly.express as px
import plotly.graph_objects as go
pyo.init_notebook_mode()
fig = px.scatter(total_summary_stats, x="time_since_start(mins)", y="cc_cen_col_dist_mean", color="folder_name",size = 'cc_area')
fig.show()