# Check analysis of depos visually


Measures:
1) Total signal in the field of view of the corresponding mice/ organ (sum of all pixels). This value was regarded as total signal of mice (IEX+IN)

2) Number of pixels with values above predetermined threshold (hodnota). This value was regarded as size of depot in mice (SEX+IN)

3) K10[OG1]  index of depots. In this parameter, mean value of top 10% most intensive pixels in the FOV are divided by the mean of pixels above threshold (hodnota). This value was used as an indicator of distribution width of the depot.

4) Fractal analysis of the shape of shape of the depot (pixels with values above predetermined threshold hodnota). This value was used as an indicator of distribution width of the depot.


----
## NOTES
1. IF date is missing -- take it from the name

2. IF shape is 2048x2048, interpolate it to 1024x1024

In [None]:
# jupyter nbconvert run_analysis-check_results.ipynb --no-input --to html

import h5py
import os, sys
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import time as timer # because I use time all over as a variable

# Importing module for functions
functions_modul =  "./functions"
sys.path.insert(0, functions_modul)

from load_h5_into_np import load_h5_into_np

# load function for analysis of the images
from process_signal import get_total_signal_and_area


# start the timer
start = timer.time()

# define path
all_files_path = "./frg"
mouse_slots_path = "./mice_idx"
current_path = "./"

# load correction matrix
# correction_matrix_slots_mice_all = np.load("correction_matrix_slots_mice_all.npy")
correction_matrix_slots_mice_all = np.load(os.path.join(current_path,"correction_matrix_slots_mice_all.npy"))

# load example files
all_files = os.listdir(all_files_path)
# remove control files -- they are accounted for in the correlation matrix
test_files = [x for x in all_files if "kontrola" not in x]

# Load positions of mice from bcg check (done half-manually in analyse_bcg-manual_check.ipynb)

df = pd.read_csv(os.path.join(mouse_slots_path, 'good_mice_idx.csv'))
bad_slots = pd.read_excel(os.path.join(mouse_slots_path, 'remove.xlsx'))
bad_slots.rename(columns={"Full reject":"unique_labels"}, inplace=True)

# load the time date info
df_date_info = pd.read_csv(os.path.join(current_path,'./GetDates-FRG.csv'))
df_date_info["Datetime"] = pd.to_datetime(df_date_info['Date'] + ' ' + df_date_info['Time'])
df_date_info["strip_name"] = df_date_info.Filename.str.split("MWL").str[0]
# without the time
df_date_info["id_name"] = df_date_info.Filename.str.split("_").str[:-4].str.join('_')

# read dict of known mismatch names
df_rename_dict = pd.read_csv(os.path.join(current_path,'rename_dict.csv'))

# run the analysis
off = 70
background = 25
thr = 200  # threshold for depo
signal_arr = []
missing_date = []
missing_date2 = []
missing_bcg = []
wrong_shape = []
reduced_size = (1024, 1024)  # to which size we want to convert the bigger ones
plot_slots = True

df_results = pd.DataFrame(
    columns=[
        "name",
        "mouse_name",  # for being able to follow one mouse over time
        "slot",
        "type",
        "viewpoint", # which part of the mouse was captured
        "area",
        "intensity_mean",
        "intensity_mean_depo",
        "intensity_max",
        "intensity_median",
        "intensity_median_depo",
        "full_signal_mean",  # done on original matrix without thr
        "full_signal_sum",
        "total_signal",
        "total_signal_depo",
        "k10",
        "date",
        "time",
        "datetime",
        "time_zero",
        "matrix_shape"
    ])

for file in test_files:
    df_tmp = None
    slots = None
    tmp_arr = None
    zero_time = False
    unique_label = None
    viewpoint = None    

    # take the name without the img and the end part
    frg_name = '_'.join((file.split('MWL')[0]).split('_')[1:])

    # check if it needs to be renamed (known mismatches)
    if frg_name in df_rename_dict["frg_name"].values:
        bcg_name = df_rename_dict.loc[df_rename_dict["frg_name"] == frg_name, "bcg_name"].values[0]
    else:
        bcg_name = frg_name

    # find the relevant date-time result
    if df_date_info[df_date_info.Filename.str.contains(frg_name)].Date.values.size == 0:
        # either put nothing there
        #         date = np.nan
        #         time = np.nan
        #         datetime = np.nan
        # or get it from the name
        date = '/'.join(
            frg_name.split('_')[-2].split('-')[1::] + [frg_name.split('_')[-2].split('-')[0]])  # mm/dd/yyyy
        time = frg_name.split('_')[-1].replace('-', ':')[:-1]
        datetime = pd.to_datetime(date + ' ' + time)

        # save the name to check later
        missing_date.append(frg_name)
        missing_date2.append(file)
        print(f"\nThe following name was not found in the date_csv.\n{frg_name}.\n\n")
    else:
        date = df_date_info[df_date_info.Filename.str.contains(frg_name)].Date.values[0]
        time = df_date_info[df_date_info.Filename.str.contains(frg_name)].Time.values[0]
        datetime = df_date_info[df_date_info.Filename.str.contains(frg_name)].Datetime.values[0]

    # check for starting files -- 0 time
    if "_00h_" in frg_name:
        zero_time = True

    # get viewpoint
    if "richo" in file:
        # there is some Bricho and some bricho
        viewpoint = "bricho"
    elif "p_bok" in file:
        viewpoint = "p-bok"
    elif "l_bok" in file:
        viewpoint = "l-bok"
    else:
        viewpoint = "non-specified"        
        
    # find the relevant info about which slots to take from the bcg csv
    slots = df.loc[df.name.str.contains(bcg_name)].slot.values

    # check if there are any results
    if slots.size == 0:
        # save the name to check later
        missing_bcg.append(frg_name)
        print(
            f"\nThe following name was not found in the df.\nfrg: {frg_name}\nbcg: {bcg_name}, slots: {slots}\n\n")
    else:
        if slots.size > 3:
            print(
                f"\nSomething fishy with file {frg_name}\nbcg: {bcg_name} since there are more than three slots ({slots}).\n")

        # load the arr
        tmp_arr = load_h5_into_np(os.path.join(all_files_path, file))
        # loop only over correct shape
        if tmp_arr.shape == (1024, 1024):
            # go over slots and do the job
            for i in slots:
                # check if we should check it -- if it is a bad slot
                unique_label = file.split("_")[3] + "_m" + str(i) + "_" + file.split("_")[0] + "_slot" + str(i)

                if unique_label in bad_slots["unique_labels"].values:
                    continue
                else:
                    # be sure
                    area, intensity, full_signal = None, None, None
                    y = None

                    # load the given slot
                    y = tmp_arr[off + (200 * i): off + (200 * (i + 1)), 250:-1]
                    # correct for the given slot
                    y -= correction_matrix_slots_mice_all[(200 * i): (200 * (i + 1)), :]
                    # remove negative values
                    y[y < 0] = 0
                    #                 plt.imshow(y>background)
                    #                 plt.show()
                    print(f"mean {y.mean()} for slot {i} in {file}")
                    #                 bcg_mean_705[j, i] = y.mean()
                    signal_arr.append(y.mean())
                    #         sns.histplot((y[y>background]).reshape(-1), ax=ax[2*j+1,i])

                    # get the measures
                    #                 area, intensity, intensity_max = get_total_signal_and_area(y, thr)
                    #                 print(f"area: {area}, intensity: {intensity}, max_int: {intensity_max}")
                    area, intensity, intensity_max, intensity_depo, intensity_med, intensity_med_depo, total_signal, \
                    total_signal_depo, k_ten = get_total_signal_and_area(y, thr)
                    print(
                        f"area: {area}, intensity: {intensity}, int_depo: {intensity_depo}, max_int: {intensity_max}, int_med: {intensity_med}, int_med_depo: {intensity_med_depo}")

                    # plot
                    if plot_slots:
                        temp_arr = None
                        temp_arr = y.copy()
                        temp_arr[temp_arr < thr] = 0
                        plt.figure(figsize=(8, 2))
                        sns.heatmap(temp_arr)
                        plt.show()
                        plt.imshow(temp_arr)
                        plt.show()

                    # update the dataframe
                    df_results = df_results.append({
                        "name": file,
                        "mouse_name": '_'.join(frg_name.split('_')[:-3]),
                        "unique_label": unique_label,
                        "slot": i,
                        "type": file.split("_")[3],
                        "viewpoint": viewpoint,                        
                        "area": area,
                        "intensity_mean": intensity,
                        "intensity_mean_depo": intensity_depo,
                        "intensity_max": intensity_max,
                        "intensity_median": intensity_med,
                        "intensity_median_depo": intensity_med_depo,
                        "full_signal_mean": y.mean(),
                        "full_signal_sum": y.sum(),
                        "total_signal": total_signal,
                        "total_signal_depo": total_signal_depo,
                        "k10": k_ten,
                        # TODO
                        "date": date,
                        "time": time,
                        "datetime": datetime,
                        "time_zero": zero_time,
                        "matrix_shape": 1024
                    },
                        ignore_index=True,
                    )

        # TODO, the issue here is that we need to resize, otherwise corrections don't work
        elif tmp_arr.shape == (2048, 2048):
            # do interpolation, see below
            tmp_arr = cv2.resize(tmp_arr, dsize=reduced_size, interpolation=cv2.INTER_CUBIC)

            # go over slots and do the job
            for i in slots:
                # check if we should check it -- if it is a bad slot
                unique_label = file.split("_")[3] + "_m" + str(i) + "_" + file.split("_")[0] + "_slot" + str(i)

                if unique_label in bad_slots["unique_labels"].values:
                    continue
                else:
                    # be sure
                    area, intensity, full_signal = None, None, None
                    y = None

                    # load the given slot
                    y = tmp_arr[off + (200 * i): off + (200 * (i + 1)), 250:-1]
                    # correct for the given slot
                    y -= correction_matrix_slots_mice_all[(200 * i): (200 * (i + 1)), :]
                    # remove negative values
                    y[y < 0] = 0
                    #                 plt.imshow(y>background)
                    #                 plt.show()
                    print(f"mean {y.mean()} for slot {i} in {file}")
                    #                 bcg_mean_705[j, i] = y.mean()
                    signal_arr.append(y.mean())
                    #         sns.histplot((y[y>background]).reshape(-1), ax=ax[2*j+1,i])

                    # get the measures
                    #                 area, intensity, intensity_max = get_total_signal_and_area(y, thr)
                    #                 print(f"area: {area}, intensity: {intensity}, max_int: {intensity_max}")
                    area, intensity, intensity_max, intensity_depo, intensity_med, intensity_med_depo, total_signal, \
                    total_signal_depo, k_ten = get_total_signal_and_area(y, thr)
                    print(
                        f"area: {area}, intensity: {intensity}, int_depo: {intensity_depo}, max_int: {intensity_max}, int_med: {intensity_med}, int_med_depo: {intensity_med_depo}")

                    # plot
                    if plot_slots:
                        temp_arr = None
                        temp_arr = y.copy()
                        temp_arr[temp_arr < thr] = 0
                        plt.figure(figsize=(8, 2))
                        sns.heatmap(temp_arr)
                        plt.show()
                        plt.imshow(temp_arr)
                        plt.show()

                    # update the dataframe
                    df_results = df_results.append({
                        "name": file,
                        "mouse_name": '_'.join(frg_name.split('_')[:-3]),
                        "unique_label": unique_label,
                        "slot": i,
                        "type": file.split("_")[3],
                        "viewpoint": viewpoint,                        
                        "area": area,
                        "intensity_mean": intensity,
                        "intensity_mean_depo": intensity_depo,
                        "intensity_max": intensity_max,
                        "intensity_median": intensity_med,
                        "intensity_median_depo": intensity_med_depo,
                        "full_signal_mean": y.mean(),
                        "full_signal_sum": y.sum(),
                        "total_signal": total_signal,
                        "total_signal_depo": total_signal_depo,
                        "k10": k_ten,
                        # TODO
                        "date": date,
                        "time": time,
                        "datetime": datetime,
                        "time_zero": zero_time,
                        "matrix_shape": 2048
                    },
                        ignore_index=True,
                    )
        else:
            wrong_shape.append(file)

# convert flags to bool
df_results.time_zero = df_results.time_zero.astype(bool)

# convert types
df_results = df_results.convert_dtypes()

# save
df_results.to_csv("results_mice.csv", index=False)   

# stop the timer
end = timer.time()
print(f"\n------\n Processend took {end - start} s.")

In [None]:
print("Without plotting:")
print("Processend took 38.66309714317322 s.")

print("With plotting:")
print(f"Processend took 1167.670970916748 s. ({1167.670970916748/60} min)")

In [None]:
print("missing bcg:")
for x in missing_bcg:
    print(x)
    
print("\nmissing dates:")
for x in missing_date:
    print(x)
    
print("\nvery wrong size (not 1024 nor 2048):")
for x in wrong_shape:
    print(x)

In [None]:
c=150 # cutoff
print(f"Mean signal: {np.array(signal_arr).mean()}, std: {np.array(signal_arr).std()}")
print(f"Max signal: {np.array(signal_arr).max()}, min: {np.array(signal_arr).min()}")
print(f"Mean with cut off: {np.array(signal_arr)[np.array(signal_arr)>c].mean()} and std: {np.array(signal_arr)[np.array(signal_arr)>c].std()}")

bins = 30
sns.histplot(np.array(signal_arr), bins=bins, kde=True, label="no cutoff")
sns.histplot(np.array(signal_arr)[np.array(signal_arr)>c], bins=bins, kde=True, color="orange", label=f"cutoff: {c}")

plt.legend()
plt.xlabel('Intensity')
plt.title('Distribution of intensities with and without a cutoff')

In [None]:
fig, axs = plt.subplots(4,2,figsize=(14,28))
sns.scatterplot(x="date", y="area", data=df_results, hue="type", ax=axs[0,0])
sns.scatterplot(x="date", y="intensity_max", data=df_results, hue="type", ax=axs[0,1])
sns.scatterplot(x="date", y="full_signal_mean", data=df_results, hue="type", ax=axs[1,0])
sns.scatterplot(x="date", y="full_signal_sum", data=df_results, hue="type", ax=axs[1,1])
sns.scatterplot(x="date", y="intensity_mean", data=df_results, hue="type", ax=axs[2,0])
sns.scatterplot(x="date", y="intensity_mean_depo", data=df_results, hue="type", ax=axs[2,1])
sns.scatterplot(x="date", y="intensity_median", data=df_results, hue="type", ax=axs[3,0])
sns.scatterplot(x="date", y="intensity_median_depo", data=df_results, hue="type", ax=axs[3,1])

In [None]:
measures = ['area', 'intensity_mean',
       'intensity_mean_depo', 'intensity_max', 'intensity_median',
       'intensity_median_depo', 'full_signal_mean', 'full_signal_sum',
       'total_signal', 'total_signal_depo', 'k10']

fig, axs = plt.subplots((len(measures)+1)//2,2,figsize=(14,28))

for measure, ax in zip(measures, axs.flatten()):
    sns.scatterplot(x="date", y=measure, data=df_results, hue="type", ax=ax)
    

In [None]:
measures = ['area', 'intensity_mean',
       'intensity_mean_depo', 'intensity_max', 'intensity_median',
       'intensity_median_depo', 'full_signal_mean', 'full_signal_sum',
       'total_signal', 'total_signal_depo', 'k10']

fig, axs = plt.subplots((len(measures)+1)//2,2,figsize=(14,28))

for measure, ax in zip(measures, axs.flatten()):
    sns.scatterplot(x="date", y=measure, data=df_results, hue="type", ax=ax)
    sns.lineplot(x="date", y=measure, data=df_results, hue="type", ax=ax)
    

In [None]:
df_results

In [None]:
pol_type = "p2"
g = sns.scatterplot(x="date", y="intensity_mean_depo", data=df_results[df_results.type==pol_type], label=pol_type)

# g.set_xticklabels(df_results.loc[df_results.type=="p1", "date"].tolist(), rotation = 90)
# g.set_xticklabels(g.get_xticklabels(), rotation = 90)

plt.xticks(rotation=90)
plt.legend(title="Type")

plt.show()

In [None]:
pol_type = "p2"
g = sns.scatterplot(x="date", y="k10", data=df_results[df_results.type==pol_type], label=pol_type)

# g.set_xticklabels(df_results.loc[df_results.type=="p1", "date"].tolist(), rotation = 90)
# g.set_xticklabels(g.get_xticklabels(), rotation = 90)

plt.xticks(rotation=90)
plt.legend(title="Type")

plt.show()

In [None]:
plt.figure(figsize=(10,10))

sns.heatmap(df_results.corr(), square=True, annot=True)
df_results.corr()

In [None]:
# fix problems in df if there are for some reason some corrupt values for date times

# create a series with concat date and time
s = pd.to_datetime(df_results[['date','time']].agg(' '.join, axis=1))
# add back
df_results['datetime'] = pd.to_datetime(df_results['datetime'], errors='coerce').fillna(s)

# check
print("problems with datetime type:")
#https://stackoverflow.com/questions/34207339/how-to-get-all-rows-with-invalid-np-datetime64-dates-in-a-pandas-dataframe
df_results.loc[pd.to_datetime(df_results['datetime'], errors='coerce').isnull(), ["datetime", "mouse_name", "time", "date"]]


### Add time duration

In [None]:
# names of mice which have been shifted in one day therefore the slot will not be fitting
shifted_mice = [
    'im75_pondeli_02_i1_polymer_vsechny_bricho_d70_2020-11-16_14-46-49-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im87_pondeli_02_i1_polymer_vsechny_l_bok_d70_2020-11-16_14-51-04-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im111_pondeli_02_i1_polymer_vsechny_p_bok_d70_2020-11-16_14-54-53-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im112_pondeli_02_i1_polymer_vsechny_p_bok_d70_2020-11-16_14-59-39-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im151_pondeli_03_f2_polymer_vsechny_Bricho_d70_2020-11-16_16-17-17-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im164_pondeli_03_f2_polymer_vsechny_l_bok_d70_2020-11-16_16-21-13-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im188_pondeli_03_f2_polymer_vsechny_p_bok_d70_2020-11-16_16-25-01-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im189_pondeli_03_f2_polymer_vsechny_p_bok_d70_2020-11-16_16-28-59-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im227_pondeli_04_p2_polymer_vsechny_Bricho_d70_2020-11-16_19-09-38-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im240_pondeli_04_p2_polymer_vsechny_l_bok_d70_2020-11-16_19-12-59-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im264_pondeli_04_p2_polymer_vsechny_p_bok_d70_2020-11-16_19-16-41-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im265_pondeli_04_p2_polymer_vsechny_p_bok_d70_2020-11-16_19-20-25-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im304_pondeli_05_e1_polymer_vsechny_Bricho_d70_2020-11-16_19-30-32-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im317_pondeli_05_e1_polymer_vsechny_l_bok_d70_2020-11-16_19-34-17-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im341_pondeli_05_e1_polymer_vsechny_p_bok_d70_2020-11-16_19-41-01-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im342_pondeli_05_e1_polymer_vsechny_p_bok_d70_2020-11-16_19-58-29-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im437_streda_13_fyzak_vsechny_p_bok_d70_2020-11-16_20-47-19-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im438_streda_13_fyzak_vsechny_p_bok_d70_2020-11-16_20-51-23-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im115_pondeli_02_i1_polymer_vsechny_p_bok_d85_2020-12-02_11-05-46-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im192_pondeli_03_f2_polymer_vsechny_p_bok_d85_2020-12-02_11-28-13-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im268_pondeli_04_p2_polymer_vsechny_p_bok_d85_2020-12-02_12-11-18-MWL-Ex(750)-Em(830)_Frgrnd.h5',
'im345_pondeli_05_e1_polymer_vsechny_p_bok_d85_2020-12-02_14-10-31-MWL-Ex(750)-Em(830)_Frgrnd.h5'
]

In [None]:
# load it from an external file
df_start_time = pd.read_csv(os.path.join(current_path,'seznam_skupin_OG.csv'))
df_start_time['datetime'] = pd.to_datetime(df_start_time[['DayAdministered','TimeAdministered']].agg(' '.join, axis=1))

# go over all starts
for index, row in df_results.iterrows():  
    # check 
    if row["name"] in shifted_mice:
        row_slot = row["slot"] +1
    else:
        row_slot = row["slot"]
    
    try:
        df_results.loc[df_results.index == index, "start_time2"] = df_start_time.loc[(df_start_time["mouse_name_unique"] == row["mouse_name"])&(df_start_time["Slot"] == row_slot), "datetime"].values[0]# minute
    except IndexError:
        # if it doesn't have time 0
        print(row["name"])
    
df_results["elapsed_time"] = (df_results["datetime"]-df_results["start_time2"])#.dt.seconds
df_results["elapsed_time_sec"] = (df_results["datetime"]-df_results["start_time2"]).dt.total_seconds()


# check
print("This should be empty")
print(df_results[df_results["elapsed_time"].isnull()].mouse_name.unique())

In [None]:
# original way

# no_00h_mouse = []
# for mouse in df_results.name:
#     start_mouse = df_results.loc[df_results["name"]==mouse, "mouse_name"].values[0]
#     try:
#         df_results.loc[df_results["name"]==mouse, "start_time"] = df_results.loc[(df_results["mouse_name"]==start_mouse)&(df_results["time_zero"]),"datetime"].values[0]# minute
#     except IndexError:
#         # if it doesn't have time 0
#         no_00h_mouse.append(mouse)
# #         print(mouse)
# print("Mice not having 00h source")
# for x in set(no_00h_mouse):
#     print(x)


# # add elapsed time as a difference
# df_results["elapsed_time"] = (df_results["datetime"]-df_results["start_time"])

In [None]:
# save
df_results.to_csv(os.path.join(current_path,"results_mice.csv"), index=False)
print("saved")

In [None]:
%load_ext watermark

%watermark -a 'Jan Kadlec' -nmvu -iv