# Load libraries

In [None]:
pip install -r https://github.com/corinef/Automated-root-classification/blob/main/requirements.txt

In [1]:
import os
import re
import pandas as pd
import numpy as np
from tqdm import tqdm
from pathlib import Path
from spectral import envi, imshow, kmeans
import matplotlib.pyplot as plt

# Load data

Insert name of folder containing data_files and scripts here

In [29]:
folder = ''

In [30]:
data_path = Path(folder)

data_files = 'Data_files'
data_files_path = data_path / data_files

### Set results path

In [35]:
# Change the current working directory to the results_path
results_folder = 'Data_classification_results/KMeans data'
results_path = data_path / results_folder
results_path.mkdir(parents=True, exist_ok = True)
os.chdir(results_path)

### Processing functions

In [17]:
def process_hdr_files(data_path):
    data = {}
    
    hdr_files = [x for x in data_files_path.iterdir() if x.suffix == '.hdr' and any(suffix in x.stem for suffix in ['_processed'])]
    
    for hdr_file in tqdm(hdr_files):
        name = " ".join(re.findall('(.*?)\_processed', hdr_file.stem))

        try:
            X = envi.open(hdr_file.as_posix()).load()
            cleaned_X = np.where(X > 1, 0, X.copy())

            data[name] = cleaned_X
            
        except Exception as e:
            print(f"Error while loading data from '{hdr_file}': {e}")

    return data

### Process data

In [18]:
# Call the function to process the '.hdr' files and populate the data dictionary
data = process_hdr_files(data_path)

100%|██████████| 2/2 [00:09<00:00,  4.81s/it]


In [19]:
data.keys()

dict_keys(['A2', 'D2'])

### Load one file for metadata and wavelengths

In [20]:
def get_wl(data_path):
    
    hdr_files = [x for x in data_files_path.iterdir() if x.suffix == '.hdr' and x.stem.endswith('_processed')]

    for file in hdr_files:
        data_file = envi.open(file.as_posix()).load()
        break

    wl_list = np.array(data_file.metadata['wavelength'])
    wl = np.array([float(wavelength) for wavelength in wl_list])
    del data_file
    
    return wl

In [21]:
wl = get_wl(data_path)

In [22]:
print(wl)

[470.        472.885906  475.7718121 478.6577181 481.5436242 484.4295302
 487.3154362 490.2013423 493.0872483 495.9731544 498.8590604 501.7449664
 504.6308725 507.5167785 510.4026846 513.2885906 516.1744966 519.0604027
 521.9463087 524.8322148 527.7181208 530.6040268 533.4899329 536.3758389
 539.261745  542.147651  545.033557  547.9194631 550.8053691 553.6912752
 556.5771812 559.4630872 562.3489933 565.2348993 568.1208054 571.0067114
 573.8926174 576.7785235 579.6644295 582.5503356 585.4362416 588.3221477
 591.2080537 594.0939597 596.9798658 599.8657718 602.7516779 605.6375839
 608.5234899 611.409396  614.295302  617.1812081 620.0671141 622.9530201
 625.8389262 628.7248322 631.6107383 634.4966443 637.3825503 640.2684564
 643.1543624 646.0402685 648.9261745 651.8120805 654.6979866 657.5838926
 660.4697987 663.3557047 666.2416107 669.1275168 672.0134228 674.8993289
 677.7852349 680.6711409 683.557047  686.442953  689.3288591 692.2147651
 695.1006711 697.9865772 700.8724832 703.7583893 70

# Pre-processing with KMeans<a class="anchor" id="kmeans"></a>

Run each K-means with 3 clusters and 20 iterations

    1. Convert all K-Means Spectra to one dataframe with correct labels
    
    2. Export dataframe to excel
    
Show the K-means class images

    -  Save as .PNG file

In [23]:
def run_kmeans(data, num_clusters, num_iterations):
    '''Example usage:
    Assuming you have a dictionary called 'data':
    data = {'key1': data1, 'key2': data2, ...}
    run_kmeans(data, num_clusters=3, num_iterations=20)'''

    results_dict = {}
    
    for x in tqdm(data.keys()):
            print(x)
            result = kmeans(data[x], num_clusters, num_iterations)
            
            results_dict[f"kmeans_img_{x}"] = result[0]
            
            results_dict[f"kmeans_spectra_{x}"] = pd.DataFrame(result[1], columns = wl, index = k_index)

    return results_dict

In [24]:
k_index = ['1', '2', '3'] 

all_kmeans = run_kmeans(data, num_clusters = 3, num_iterations = 20)

  0%|          | 0/2 [00:00<?, ?it/s]

A2


spectral:INFO: k-means iteration 1 - 626102 pixels reassigned.
spectral:INFO: k-means iteration 2 - 69754 pixels reassigned.
spectral:INFO: k-means iteration 3 - 47047 pixels reassigned.
spectral:INFO: k-means iteration 4 - 68560 pixels reassigned.
spectral:INFO: k-means iteration 5 - 72704 pixels reassigned.
spectral:INFO: k-means iteration 6 - 69711 pixels reassigned.
spectral:INFO: k-means iteration 7 - 64478 pixels reassigned.
spectral:INFO: k-means iteration 8 - 58730 pixels reassigned.
spectral:INFO: k-means iteration 9 - 52608 pixels reassigned.
spectral:INFO: k-means iteration 10 - 47008 pixels reassigned.
spectral:INFO: k-means iteration 11 - 41412 pixels reassigned.
spectral:INFO: k-means iteration 12 - 35983 pixels reassigned.
spectral:INFO: k-means iteration 13 - 30671 pixels reassigned.
spectral:INFO: k-means iteration 14 - 25870 pixels reassigned.
spectral:INFO: k-means iteration 15 - 21942 pixels reassigned.
spectral:INFO: k-means iteration 16 - 18561 pixels reassigned.


D2


spectral:INFO: k-means iteration 1 - 63327 pixels reassigned.
spectral:INFO: k-means iteration 2 - 10177 pixels reassigned.
spectral:INFO: k-means iteration 3 - 12986 pixels reassigned.
spectral:INFO: k-means iteration 4 - 26940 pixels reassigned.
spectral:INFO: k-means iteration 5 - 51104 pixels reassigned.
spectral:INFO: k-means iteration 6 - 91458 pixels reassigned.
spectral:INFO: k-means iteration 7 - 141309 pixels reassigned.
spectral:INFO: k-means iteration 8 - 182011 pixels reassigned.
spectral:INFO: k-means iteration 9 - 190392 pixels reassigned.
spectral:INFO: k-means iteration 10 - 168819 pixels reassigned.
spectral:INFO: k-means iteration 11 - 134416 pixels reassigned.
spectral:INFO: k-means iteration 12 - 101101 pixels reassigned.
spectral:INFO: k-means iteration 13 - 73148 pixels reassigned.
spectral:INFO: k-means iteration 14 - 51869 pixels reassigned.
spectral:INFO: k-means iteration 15 - 36395 pixels reassigned.
spectral:INFO: k-means iteration 16 - 25404 pixels reassig

In [25]:
all_kmeans.keys()

dict_keys(['kmeans_img_A2', 'kmeans_spectra_A2', 'kmeans_img_D2', 'kmeans_spectra_D2'])

In [27]:
all_kmeans['kmeans_spectra_A2']

Unnamed: 0,470.000000,472.885906,475.771812,478.657718,481.543624,484.429530,487.315436,490.201342,493.087248,495.973154,...,874.026846,876.912752,879.798658,882.684564,885.570470,888.456376,891.342282,894.228188,897.114094,900.000000
1,0.061338,0.061254,0.059574,0.058025,0.056806,0.056754,0.058234,0.056709,0.055121,0.053163,...,0.158653,0.160488,0.162443,0.164511,0.166369,0.168284,0.170045,0.171925,0.172775,0.172775
2,0.153077,0.153369,0.151087,0.149361,0.148657,0.149288,0.151144,0.150332,0.149569,0.148828,...,0.312734,0.314156,0.315511,0.316741,0.317716,0.318706,0.31953,0.320536,0.321034,0.321034
3,0.323643,0.32393,0.320425,0.318705,0.319375,0.321596,0.324095,0.324095,0.324723,0.326374,...,0.532199,0.532583,0.532808,0.532669,0.532146,0.531469,0.530619,0.530017,0.529852,0.529852


## Training spectra dataframe<a class="anchor" id="training_df"></a>

In [32]:
all_kmeans_data = pd.DataFrame()

for key, item in all_kmeans.items():
    if 'spectra' in key:
        
        t = " ".join(re.findall('kmeans_spectra_(.*)', key))
        temp_df = all_kmeans[key].copy()
        temp_df['Species'] = t
        
        temp_df.reset_index(inplace = True)
        temp_df.rename(columns = {'index' : 'Class'}, inplace = True)
        temp_df.set_index(['Class', 'Species'], inplace = True)
        
        all_kmeans_data = pd.concat([all_kmeans_data, temp_df])

all_kmeans_data.to_excel('kmeans_data.xlsx') 

In [16]:
all_kmeans_data

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,470.000000,472.885906,475.771812,478.657718,481.543624,484.429530,487.315436,490.201342,493.087248,495.973154,...,874.026846,876.912752,879.798658,882.684564,885.570470,888.456376,891.342282,894.228188,897.114094,900.000000
Class,Species,Replicate,Day,Treatment,Species_Groups,Treatment_Groups,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1
1,A2,1,40,f11,1,1,0.061338,0.061254,0.059574,0.058025,0.056806,0.056754,0.058234,0.056709,0.055121,0.053163,...,0.158653,0.160488,0.162443,0.164511,0.166369,0.168284,0.170045,0.171925,0.172775,0.172775
2,A2,1,40,f11,1,1,0.153077,0.153369,0.151087,0.149361,0.148657,0.149288,0.151144,0.150332,0.149569,0.148828,...,0.312734,0.314156,0.315511,0.316741,0.317716,0.318706,0.31953,0.320536,0.321034,0.321034
3,A2,1,40,f11,1,1,0.323643,0.32393,0.320425,0.318705,0.319375,0.321596,0.324095,0.324095,0.324723,0.326374,...,0.532199,0.532583,0.532808,0.532669,0.532146,0.531469,0.530619,0.530017,0.529852,0.529852
1,D2,1,40,f11,2,1,0.034746,0.034982,0.033824,0.032761,0.032229,0.033073,0.034601,0.033345,0.032017,0.030329,...,0.08953,0.091078,0.092743,0.094471,0.096128,0.097807,0.09926,0.100929,0.101721,0.101721
2,D2,1,40,f11,2,1,0.052583,0.052743,0.050775,0.048993,0.048145,0.04903,0.050414,0.048865,0.047481,0.046036,...,0.173969,0.176027,0.178144,0.180264,0.182202,0.184132,0.18581,0.187686,0.188546,0.188546
3,D2,1,40,f11,2,1,0.183949,0.184184,0.181188,0.179525,0.179744,0.181277,0.182725,0.181675,0.181109,0.18114,...,0.350809,0.35189,0.352825,0.353542,0.353946,0.354262,0.354422,0.354926,0.355201,0.355201


## KMeans class data<a class="anchor" id="class_data"></a>

In [33]:
for key, item in all_kmeans.items():
    if 'img' in key:
        df = pd.DataFrame(item)
        df.to_csv(key + '.csv')

## Show KMeans classes<a class="anchor" id="show_classes"></a>

In [34]:
for key, item in all_kmeans.items():
    if 'img' in key:
        imshow(classes = item, colors = [(0, 0, 0), (212, 17, 89), (255, 215, 0)])
        plt.axis('off')
        plt.savefig(key + '.png', dpi=300, bbox_inches='tight')
        plt.close()