# Setup

In [None]:
pip install -r requirements.txt

In [None]:
from PIL import Image
from tqdm import tqdm
from sklearn.decomposition import PCA
from matplotlib.pyplot import imshow, figure
from tifffile import imsave
import numpy as np
import os

In [None]:
input_path = './input'
output_path = './output'

In [None]:
input_files = [filename for filename in os.listdir(input_path)]
print(f'input_files ({len(input_files)}): {input_files}')

# Files parsing
For each file:

1. read file
1. get image size
1. "parse" the image with PCA
1. save the output to a .tif file

In [None]:
new_files = []
images = {key: {} for key in input_files}

for image_file in tqdm(input_files, desc='Parsing files'):
    # Read image
    image_path = os.path.join(input_path, image_file)
    image = Image.open(image_path)
    image_sequence = image.getdata()
    image_array = np.array(image_sequence)
    h, w, _ = np.asarray(image).shape
    images[image_file]['input'] = np.asarray(image)
    images[image_file]['size'] = (h, w)
    
    # Use pca transformation to parse image
    pca = PCA(n_components=3)
    pca.fit(image_array)
    output = pca.transform(image_array)
    images[image_file]['pca'] = pca
    
    # Save output as tif
    new_filename = f'{image_file.split(".")[0]}.tif'
    imsave(os.path.join(output_path, new_filename), output.reshape(h, w, 3))
    new_files.append(new_filename)
    images[image_file]['output'] = output.reshape(h, w, 3)
print(f'Parsed files: {new_files}')