# Landsat Image Compression using PCA

### 1. Load the 5 images and plot them. Make sure the name of the file is displayed on the title of the relevant image. (4 points)

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
import os
from PIL import Image
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
import plotly.express as px

In [None]:
path = 'Data_export/Landsat Images/'
X = []
for imgFile in os.listdir(path):
    # Read all images in the folder
    imgArray = np.array(Image.open(os.path.join(path, imgFile)))
    # Plot the images with the name of the file displayed on the title
    imgshow = px.imshow(imgArray, binary_string = True, title = imgFile)
    imgshow.show()
    # Stack all images together
    X.append(imgArray)

### 2. Set up a pipeline that scales the values linearly between 0 and 1 and applies PCA such that 5 images from different wavelengths are combined to 1 channel data. (2 points)

In [None]:
# Convert X to np.array
X = np.array(X)
print(X.shape)

In [None]:
# Setting up pipeline that scales between 0 and 1
# Since we are combining 5 images into 1 channel data, we set components = 1
my_pca_pipeline = Pipeline(
    steps = [
        ('scale', MinMaxScaler()), 
        ('pca', PCA(n_components = 1))
    ]
)

In [None]:
X_train = X.reshape(X.shape[0],X.shape[1]*X.shape[2])
# We need to transpose X_train because the number of samples are pixels
# and the number of features are the images
X_train = X_train.T
X_train.shape

In [None]:
my_pca_pipeline.fit(X_train)

### 3. Report the number of features and the number of samples. (2 points)

In [None]:
print('Number of features:' , my_pca_pipeline[-1].n_features_in_)
print('Number of samples:', my_pca_pipeline[-1].n_samples_)

### 4. What are the PCA directions? (2 points)

In [None]:
print('PCA direction:', my_pca_pipeline[-1].components_)

### 5. What is the explained variance ratio when using only this 1 image? (2 points)

In [None]:
print('ratio:', my_pca_pipeline[-1].explained_variance_ratio_)

### 6. What is the value of the last pixel in the new image? (2 points)

In [None]:
pcaX = my_pca_pipeline.transform(X_train)
pcaX[-1]

The value of the last pixel in the new image is 1.10282599.

### 7. Display the new image. (6 points)

In [None]:
# We only have one image, pcaX
finalfig = px.imshow(pcaX.reshape(X.shape[1], X.shape[2]), binary_string = True)
finalfig.update_layout (title = 'New image')
finalfig.show()