$\textbf{An example of working with TableHyperImg}$

In [2]:
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

import hyper_img as hyper

$\textbf{Pathes and constants}$

In [4]:
IMG_PATH = '/home/igor/projects/Seed/data/hyper_img_data/'
URL = 'https://docs.google.com/spreadsheets/some_table'
TARGET_VARIABLE = 'Pigment'

$\textbf{Read a google table}$ 

In [5]:
df = hyper.get_google_table_sheets()

In [6]:
df.head()

Unnamed: 0,PlantNumber,Image Name,Mutation,Голозерность/Пленчатость,Хим. анализ (пигмент 1),Хим. анализ (пигмент 2),ID партии,Линия/Сорт,Pigment,Black calibration data,White calibration data
0,10А-9-1,2022_09_02_session_000_001_snapshot_cube.tiff,-4,Пленчатые,,,2022_09_02,Myc2,only anthocyanins,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...
1,10А-9-1,2022_09_02_session_000_002_snapshot_cube.tiff,-4,Пленчатые,,,2022_09_02,Myc2,only anthocyanins,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...
2,10А-9-3,2022_09_02_session_000_003_cube.tiff,-4,Пленчатые,,,2022_09_02,Myc2,only anthocyanins,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...
3,10А-9-3,2022_09_02_session_000_004_snapshot_cube.tiff,-4,Пленчатые,,,2022_09_02,Myc2,only anthocyanins,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...
4,10А-9-4,2022_09_02_session_000_005_snapshot_cube.tiff,-4,Пленчатые,,,2022_09_02,Myc2,only anthocyanins,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...


$\textbf{Get a list of TableHyperImg}$

With normalization 

Mutation determination task

In [38]:
img_lst = list()
for id in df[df.Mutation != '']['ID партии'].unique():
    if id in ['', ' ']:
        continue
    img_lst_id: list[hyper.TableHyperImg] = hyper.get_list_hyper_img(seq_names=IMG_PATH + df[df.Mutation != ''][df['ID партии'] == id]['Image Name'],
                                                                     class_name=hyper.TableHyperImg,
                                                                     segmenter=hyper.PlainCv2Segmenter(25),
                                                                     filter=lambda x: x != '',
                                                                     same_samples=['purple', 'light', 'blue'],
                                                                     norm_seq_tg_name=['-1', '-4'],
                                                                     table=df,
                                                                     target_varible_name='Mutation')
    img_lst.extend(img_lst_id)
len(img_lst)

116

In [39]:
hyper.get_count_group(img_lst)

{'-4': 36,
 '-1': 42,
 'wt': 14,
 'light': 2,
 'purple': 2,
 'blue': 2,
 '-11': 10,
 '1': 8}

Without normalization

Pigment determination task (we will work on this issue in the future)

In [40]:
img_lst = list()
for id in df[df[TARGET_VARIABLE] != '']['ID партии'].unique():
    if id in ['', ' ']:
        continue
    img_lst_id: list[hyper.TableHyperImg] = hyper.get_list_hyper_img(seq_names=IMG_PATH + df[df[TARGET_VARIABLE] != ''][df['ID партии'] == id]['Image Name'],
                                                                     class_name=hyper.TableHyperImg,
                                                                     segmenter=hyper.PlainCv2Segmenter(28.276),
                                                                     filter=lambda x: x != '',
                                                                     table=df,
                                                                     target_varible_name=TARGET_VARIABLE)
    img_lst.extend(img_lst_id)
len(img_lst)

313

In [41]:
hyper.get_count_group(img_lst)

{'only anthocyanins': 108,
 'white': 88,
 'only melanin': 91,
 'melanin and anthocyanins': 26}

$\textbf{Plots}$

Plots colors

In [13]:
color={'only anthocyanins': 'black',
       'white': 'green',
       'only melanin': 'red',
       'melanin and anthocyanins': 'yellow'}

Plot of medians versus wavelength

In [None]:
hyper.get_medians_wavelenght_graph(img_lst, color=color, download_path='graph.html')

Visualization in two-dimensional space using PCA

In [None]:
hyper.get_2_pca_graph(img_lst, color=color, download_path='graph.html')

Visualization in two-dimensional space using UMAP

In [None]:
hyper.get_2_umap_graph(img_lst, color=color, n_neighbors=15, download_path='graph.html')

Visualization in two-dimensional space using ISOMAP

In [None]:
hyper.get_2_isomap_graph(img_lst, color=color, n_neighbors=5, download_path='graph.html')

Clustering with the EM algorithm using UMAP for dimensionality reduction

In [None]:
hyper.get_em_algorithm_clustering_graph(img_lst, downscaling_method='UMAP', dim_clusterization=10, color=color, n_clusters=5,
                                        download_path='graph.html', download_path_table='table.xlsx')

Statistical analysis

95% confidence interval

In [None]:
hyper.get_mean_diff_and_confident_interval_graph(img_lst, '-1', 'wt', 0.95, download_path='graph.html')

U-test Mann-Whitney

In [None]:
hyper.get_mannwhitneyu_p_value_graph(img_lst, '-1', 'wt', download_path='graph.html')

Chi-square test

In [None]:
hyper.get_chi2_p_value_graph(img_lst, 'wt', '-1', number_bins=5, download_path='graph.html')

$\textbf{Get all plots}$

In [None]:
hyper.create_folder_with_all_graphs(img_lst, 'FolderName')

$\textbf{Classification in 15-dimensional space using PCA for dimensionality reduction}$

Create function converting target variable to integer sequence

In [15]:
def get_class(group_name:str):
    if group_name in 'only anthocyanins':
        return 0, 1
    elif group_name == 'only melanin':
        return 1, 0
    elif group_name == 'melanin and anthocyanins':
        return 1, 1
    return 0, 0

Classification

In [None]:
tables, matrex = hyper.get_table_res_and_confusion_matrix(img_lst,
                                                          downscaling_method='PCA',
                                                          n_components=15,
                                                          parameters_catboost={'learning_rate': np.arange(0.33, 1, 0.2),
                                                                               'iterations': [35],
                                                                               'depth':[2, 4]},
                                                          parameters_random_forest={'max_depth': [2, 4],
                                                                                    'min_samples_split': [1, 4],
                                                                                    'min_samples_leaf': [1, 4],
                                                                                    'n_estimators': [150]},
                                                          class_function=get_class,
                                                          shuffle_test=True,
                                                          save_path_folder='classification_PCA_15')

Confusion matrices

In [None]:
matrex['Logistic regression']

In [None]:
matrex['Ridge regression']

In [None]:
matrex['Random forest']

In [None]:
matrex['Catboost']

Tables with results

In [None]:
tables['macro_train']

In [None]:
tables['macro_test']

In [None]:
tables['micro_train']

In [None]:
tables['micro_test']

In [None]:
tables['average_train']

In [None]:
tables['average_test']

Some examples of getting useful pd.DataFrame (see more in get_data_funcs.py)

In [20]:
df = hyper.get_df_graphics_medians_wavelenght(img_lst)
df.head()

Unnamed: 0,Wavelength,Median,Sample,Mutation,Object name
0,450,-0.904163,0,-4,10А-9-1
1,454,-0.963002,0,-4,10А-9-1
2,458,-1.038734,0,-4,10А-9-1
3,462,-1.114622,0,-4,10А-9-1
4,466,-1.173933,0,-4,10А-9-1


In [None]:
plt.figure(figsize=(7,5))
plt.grid()
sns.lineplot(data=df, x='Wavelength', y='Median', hue=TARGET_VARIABLE)

Create annotation

In [36]:
hyper.create_table_annotation_df(img_lst).head()

Unnamed: 0,Image Name,PlantNumber,Black calibration data,White calibration data,Pigment
0,2022_09_02_session_000_001_snapshot_cube.tiff,10А-9-1,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...,only anthocyanins
1,2022_09_02_session_000_002_snapshot_cube.tiff,10А-9-1,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...,only anthocyanins
2,2022_09_02_session_000_003_cube.tiff,10А-9-3,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...,only anthocyanins
3,2022_09_02_session_000_004_snapshot_cube.tiff,10А-9-3,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...,only anthocyanins
4,2022_09_02_session_000_005_snapshot_cube.tiff,10А-9-4,2022_09_02_calibr_session_000_026_snapshot_cub...,2022_09_02_calibr_session_000_024_snapshot_cub...,only anthocyanins
