In [None]:
import pandas as pd
import numpy as np

import seaborn as sns
import matplotlib.pyplot as plt

import tifffile as tiff

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

import hyper_img as hyper

Пути

In [None]:
IMG_PATH = '/home/igor/projects/Seed/data/pigment/'
IMG_PATH_WHITE = IMG_PATH + 'white/'
IMG_PATH_COLORED = IMG_PATH + 'colored/'
IMG_PATH_NEW_DATA = IMG_PATH + 'new_data/'
CALIBR_WHITE_PATH_WHITE = IMG_PATH_WHITE + 'White_session_000_000_cube.tiff'
CALIBR_BLACK_PATH_WHITE = IMG_PATH_WHITE + 'Black_session_000_004_snapshot_cube.tiff'
CALIBR_WHITE_PATH_COLORED = IMG_PATH_COLORED + 'White_session_000_000_snapshot_cube.tiff'
CALIBR_BLACK_PATH_COLORED = IMG_PATH_COLORED + 'Black_session_000_001_snapshot_cube.tiff'
CALIBR_BLACK_PATH_NEW_DATA = IMG_PATH_NEW_DATA + 'Black_session_000_001_snapshot_cube.tiff'
CALIBR_WHITE_PATH_NEW_DATA = IMG_PATH_NEW_DATA +'White_session_000_000_snapshot_cube.tiff'

Таблица с информацией о пигментном составе цветных изображений

In [None]:
colored_pigment = pd.read_csv(IMG_PATH + 'colored_pigment.csv')
colored_pigment.head()

In [None]:
needed_columns = [ 'меланин', 'Unnamed: 3', 'антоцианы', 'Unnamed: 5', 'Unnamed: 8']

In [None]:
colored_pigment = colored_pigment[needed_columns].iloc[1:].rename(columns={ 'меланин': 'melanin_scales', 
                                                                    'Unnamed: 3': 'melanin_pericarp',
                                                                    'антоцианы': 'anthocyanins_scales',
                                                                    'Unnamed: 5': 'anthocyanins_pericarp',
                                                                    'Unnamed: 8': 'file_name'
                                                                }).reset_index(drop=True)
colored_pigment = colored_pigment.replace(['нет', 'да'], [0, 1])
colored_pigment.head()

In [None]:
colored_pigment['melanin'] = (colored_pigment['melanin_scales']
                              + colored_pigment['melanin_pericarp']).clip(0,1)
colored_pigment['anthocyanins'] = (colored_pigment['anthocyanins_scales']
                              + colored_pigment['anthocyanins_pericarp']).clip(0,1)
colored_pigment.head()

In [None]:
new_data_table_name = pd.read_excel(IMG_PATH + 'new_data_table_name.xlsx')
new_data_table_name = new_data_table_name[['имя файла ']].rename(columns={'имя файла ': 'file_name'})
new_data_table_name.head()

In [None]:
new_data_table = pd.read_excel(IMG_PATH + 'new_data_table.xlsx')
new_data_table = new_data_table.join(new_data_table_name)
new_data_table = new_data_table.drop(columns=['Нумерация', 'Цвет зерна (на вид)', 'Черный пигмент (на вид)', 
                             'Blp ген (генотипирование)'])
new_data_table.head()

In [None]:
new_data_table = new_data_table.rename(columns={ 'Антоцианы': 'anthocyanins',
                            'Меланины': 'melanin'}).replace(['да', 'нет'], [1, 0]).dropna().reset_index(drop=True)
new_data_table['melanin'] = new_data_table['melanin'].astype('int')
new_data_table['anthocyanins'] = new_data_table['anthocyanins'].astype('int')
new_data_table.head()

In [None]:
colored_pigment = pd.concat([new_data_table, colored_pigment])
colored_pigment.sample(7)

Класс и графики

In [None]:
class PigmentHyperImg(hyper.HyperImg):
    
    def _get_tiff(self) -> np.ndarray:
        img = tiff.imread(self.path)
        if set(IMG_PATH_WHITE.split('/')).issubset(set(self.path.split('/'))):
            bl_img = tiff.imread(CALIBR_BLACK_PATH_WHITE)
            wh_img = tiff.imread(CALIBR_WHITE_PATH_WHITE)
        elif set(IMG_PATH_COLORED.split('/')).issubset(set(self.path.split('/'))):
            bl_img = tiff.imread(CALIBR_BLACK_PATH_COLORED)
            wh_img = tiff.imread(CALIBR_WHITE_PATH_COLORED)
        else:
            bl_img = tiff.imread(CALIBR_BLACK_PATH_NEW_DATA)
            wh_img = tiff.imread(CALIBR_WHITE_PATH_NEW_DATA)
        new_img = np.where(bl_img > img, 0, img - bl_img)
        return new_img /(wh_img - bl_img)

    
    def _get_target_varible(self) -> str:
        if not set(IMG_PATH.split('/')).issubset(set(self.path.split('.')[0].split('/'))):
            raise NameError('Error in path')
            
        if np.any(np.isnan(self.medians)):
            return 'nan'
        
        if np.allclose(self.medians, np.ones(len(self.medians), dtype=float)) or \
           np.allclose(self.medians, np.zeros(len(self.medians), dtype=float)): 
            return 'calibration'
        
        if set(IMG_PATH_WHITE.split('/')).issubset(set(self.path.split('/'))):
            return 'white'
    
        name: str = '_'.join([s for s in self.path.split('/')[-1].split('_')
                        if s != 'snapshot' and s != 'cube.tiff' ])
            
        if name not in np.unique(colored_pigment.file_name):
            return 'nan'
        
        if colored_pigment[colored_pigment.file_name == name].melanin.iloc[0] and \
                            colored_pigment[colored_pigment.file_name == name].anthocyanins.iloc[0]:
            return 'melanin and anthocyanins'
        
        if colored_pigment[colored_pigment.file_name == name].melanin.iloc[0]:
            return 'only melanin'
        
        if colored_pigment[colored_pigment.file_name == name].anthocyanins.iloc[0]:
            return 'only anthocyanins'
        
        return 'white'

In [None]:
img_lst: list[PigmentHyperImg] = hyper.get_list_hyper_img(path=IMG_PATH, 
                                                       class_name=PigmentHyperImg,
                                                       filter=lambda x: x != 'nan' and x != 'calibration', 
                                                       threshold_value=19.55076654, 
                                                       target_varible_name= 'Pigment')

In [None]:
len(img_lst)

In [None]:
img_lst[0]

In [None]:
plt.figure(figsize=(7,5))
plt.grid()
sns.lineplot(data=hyper.get_df_graphics_medians_wavelenght(img_lst), 
             x='Wavelength', y='Median', hue='Pigment')

In [None]:
plt.figure(figsize=(15,7))
sns.scatterplot(data=pd.DataFrame(hyper.get_df_2_pca(img_lst), 
                                  columns=['1', '2', 'Pigment']), x='1', y='2', hue='Pigment')

In [None]:
pipe = Pipeline([('scaler', StandardScaler()), ('pca', PCA(n_components=5))])

In [None]:
df = hyper.get_df_medians(img_lst)
df.head()

In [None]:
X = df.drop(['Pigment'], axis = 1)
y = df[['Pigment']]
X = pipe.fit_transform(X)
pipe['pca'].explained_variance_ratio_