# Location Based Assignment

### ** Principle Landmarks**
- At least 20 photos of your favorite landmark
- taken during the day and over a period of at least 4 hours
- Something that creates a time-lapse of your favorite landmark would be ideal, but is not necessary.

### Core assignment: 
#### write code that does the following:

1. Processes all photos down to a size not exceeding 512 pixels in either width or height
2. Using principal components analysis (PCA) project your images down to a 2 dimensional representation
3. Visually inspect the 2D locations of each photo in the new space
4. Show the reconstruction from each low-dimensional representation
5. Finally, pick a point that is far away from any known location and plot its reconstruction

- If you perform any other image processing steps, please clearly note them in your report. This is meant to be a reasonably light-hearted assignment, so feel free to pick irreverent landmarks or use interesting image processing techniques to find artistic reconstructions. You may also try to conduct this assignment in three dimensions, but no higher!

- Hand in a single PDF containing all your results and a short discussion of your findings.

#### Make sure to include:

at least 10 of your small photos and their associated reconstructions
the scatterplot of all images in the 2D space,
- and where your new point is the reconstruction from your new point

Import all relevant frameworks

In [1]:
from glob import glob
from PIL import Image
from resizeimage import resizeimage
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

### Step 1. 
Processes all photos down to a size not exceeding 512 pixels in either width or height

In [None]:
# resize and crop images
def resize_images(images_path,pixel):
    '''
    Resizes and crops images using the function given
    in the Session 7 repo from load_images.py.
    '''    
    flattened = []
    
    # for each image path
    for path in images_path:
        # open it as a read file in binary mode
        with open(path, 'r+b') as f:
            #open it as an image
            with Image.open(f) as image:
                # resize the image to be more manageable
                cover = resizeimage.resize_cover(image, [pixel,pixel]) # 250, 250
                # flatten the matrix to an array and append it to all flattened images
                flattened.append((np.array(cover).flatten(), 0))

    # Flatten it once more
    flattened = np.asarray(flattened)

    # Declare which are the X and Y inputs
    X = flattened[:,0]
    Y = flattened[:,1]

    # Use np.stack to put the data into the right dimension
    X = np.stack(i for i in X)
    Y = np.stack(i for i in Y)
    
    # return resized images
    return X, Y
path_j = glob("/Users/ey08/Documents/Minerva/Fall 2022/cs156-pcw-e-yang08/06/Jersey/*")
pixel = 512
X,Y = resize_images(path_j,pixel)

In [None]:
# resize images using function
images = X

print(f'Total Images: {len(images)}')
print(f'Images Shape: {images.shape}')

# plot samples of original images
fig = plt.figure(figsize=(15, 6)) 

for i in range(10): 
    ax = fig.add_subplot(2, 5, i+1, xticks=[], yticks=[]) 
    img = Image.fromarray(images[i].reshape(100,100,3), 'RGB') 
    ax.imshow(img, interpolation='nearest') 
    
plt.suptitle('Original Images', y=0.95)

plt.show()

### Step 2. 
Using principal components analysis (PCA) project your images down to a 2 dimensional representation

In [None]:
from sklearn import decomposition
# apply PCA using all components
n_components = len(images) #dimensions
pca = decomposition.PCA(n_components=n_components) #decomposes by n_components variables

# fit images to PCA to reduce dimensions
transformed = pca.fit_transform(images)

print(transformed.shape)
# plot number of components vs. explained variance
plt.figure(figsize=(8,6))

components = np.arange(1, n_components+1, step=1)
var = np.cumsum(pca.explained_variance_ratio_)

plt.plot(components, var, color='pink')
plt.axhline(y=0.95, color='teal', label='95% Var Explained')

plt.title('Number of Components Needed to Explain Variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Variance (%)')

plt.legend(loc=4)
plt.show()


In [None]:
# calculate PCA results and save image for each dimension

samples = []
expl_var = []
dims = []

for i in range(1,len(images)+1,1):
    
    # apply PCA
    pca = decomposition.PCA(n_components=i)

    # fit images to PCA to reduce dimensions
    transformed = pca.fit_transform(images)
    
    # transform image to save samples
    inverse = pca.inverse_transform(transformed)
    formatted = np.clip(inverse, 0, 255).astype(np.uint8)
    
    # save one random sample from each dimension
    random_row = np.random.randint(len(images), size=1)
    sample = formatted[random_row]
    
    # calculate explained variance
    pca_var_expl = \
    np.round(np.cumsum(pca.explained_variance_ratio_)[-1],2)
    
    # save results
    samples.append(sample)    
    expl_var.append(pca_var_expl)
    dims.append(i)
    
# plot sample from each dimension

print("finished applying PCA")
# reshape formatted samples
reshaped = []
for i in samples:
    new_img = Image.fromarray(i.reshape(100,100,3), 'RGB')
    reshaped.append(new_img)

# plot samples
fig = plt.figure(figsize=(16, 20)) 

for i in range(len(samples)): 
    ax = fig.add_subplot(6, 4, i+1, xticks=[], yticks=[]) 
    ax.imshow(reshaped[i], interpolation='nearest')
    ax.set_title(f'Dim: {dims[i]}, Var: {expl_var[i]}')
    
plt.suptitle('Reconstructed Images (All Dimensions)', y=0.92)

plt.show()

### Discussion of Findings


###  Optional extension:

#### Extend your previous code in the following way:
- Using the first few PCA components, train a linear regression model to predict the time of day the photos were taken (you can pull the timestamps directly from your phone to get your target feature).
- Using the first few PCA components, train any of the classifiers we've used so far in class (using k-fold crossvalidation) to predict whether the photo was taken in the first half of the photos or in the second half of the photos.
- Evaluate these models using any of the performance metrics we've discussed in class and produce a visualization of their accuracy.

- weather
- time point
- shade
- cloud

### Evaluate models

In [None]:
#1
# NB
# Import Gaussian Naive Bayes model
# reference: https://www.datacamp.com/tutorial/naive-bayes-scikit-learn
from sklearn.naive_bayes import GaussianNB

#Create a Gaussian Classifier
gnb = GaussianNB()

#Train the model using the training sets
gnb.fit(X_train, y_train)

#Predict the response for test dataset
y_pred = gnb.predict(X_test)

from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

cm = confusion_matrix(y_test,y_pred)
plt.figure(figsize=(9,9))
sns.heatmap(cm, annot=True, fmt=".3f", linewidths=.5, square = True, cmap = 'Blues_r')
plt.ylabel('Actual label')
plt.xlabel('Predicted label')
all_sample_title = 'Accuracy Score: {0}'.format(accuracy_score(y_test, y_pred))
plt.title(all_sample_title, size = 15)

print("Classifier Report",classification_report(y_test, y_pred))

### Assignment Information
Weight:
10%

#### Learning Outcomes Added
- cs156-MLCode: Produce working, readable, and performant Python implementations of a variety of machine learning systems using appropriate libraries and software tools.

- cs156-MLDevelopment: Contribute to the quality of ML resources for current and future students by compiling learning resources, sharing code, creating study groups, and supporting other student's learning.

- cs156-MLExplaination: Clearly articulate machine learning systems, algorithms, and techniques using appropriate oral and written descriptions, mathematical notation, and visualizations.

- cs156-MLFlexibility: Reason flexibly, apply information in new contexts, produce novel work, and articulate meta-knowledge about machine learning.

fit PCA
--> classify cats/dogs.

train on clothes
-> classify the different

rotating around the landmark --> assume which result
run on notebooks locally
not necessarily upload the images

code comments---audience. professor reads it!
steps, procedure within the code cell book

#### idea
different time points of that small stuff putting cigarette.

- point cloud
- 3D -- depth and lighting
- iOS14
- Use cases: predict the price of NFT ---> collect data from online?

#### DRAFT

In [None]:
# !pip install python-resize-image # install new python resize

In [None]:
"""
reference https://sle-collaboration.minervaproject.com/?id=6e1e4824-ce66-4824-8536-
aa32dd64ad24&userId=10914&name=Erela+Yang&avatar=https%3A//s3.amazonaws.com/picasso.
fixtures/Youqi_Yang_10914_2021-02-21T10%3A15%3A28.132Z&isInstructor=0&signature=4a0e
398d189bc7552fbd57bfd85393c51aa63a25d9b9aa364468b2b2856acc87

"""
from glob import glob
from PIL import Image
from resizeimage import resizeimage
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# resize and crop images
def resize_images(images_path):
    '''
    Resizes and crops images using the function given
    in the Session 7 repo from load_images.py.
    '''    
    flattened = []
    
    # for each image path
    for path in images_path:
        # open it as a read file in binary mode
        with open(path, 'r+b') as f:
            #open it as an image
            with Image.open(f) as image:
                # resize the image to be more manageable
                cover = resizeimage.resize_cover(image, [100,100]) # 250, 250
                # flatten the matrix to an array and append it to all flattened images
                flattened.append((np.array(cover).flatten(), 0))

    # Flatten it once more
    flattened = np.asarray(flattened)

    # Declare which are the X and Y inputs
    X = flattened[:,0]
    Y = flattened[:,1]

    # Use np.stack to put the data into the right dimension
    X = np.stack(i for i in X)
    Y = np.stack(i for i in Y)
    
    # return resized images
    return X, Y
path_j = glob("/Users/ey08/Documents/Minerva/Fall 2022/cs156-pcw-e-yang08/06/Jersey/*")
X,Y = resize_images(path_j)
# images = resize_images(file)[0]
# path_s = "/Users/ey08/Documents/Minerva/Fall 2022/cs156-pcw-e-yang08/06/Shirt"

In [None]:
# resize images using function
images = X

print(f'Total Images: {len(images)}')
print(f'Images Shape: {images.shape}')

# plot samples of original images
fig = plt.figure(figsize=(15, 6)) 

for i in range(10): 
    ax = fig.add_subplot(2, 5, i+1, xticks=[], yticks=[]) 
    img = Image.fromarray(images[i].reshape(100,100,3), 'RGB') 
    ax.imshow(img, interpolation='nearest') 
    
plt.suptitle('Original Images', y=0.95)

plt.show()

In [None]:
from sklearn import decomposition
# apply PCA using all components
n_components = len(images) #dimensions
pca = decomposition.PCA(n_components=n_components) #decomposes by n_components variables

# fit images to PCA to reduce dimensions
transformed = pca.fit_transform(images)

print(transformed.shape)
# plot number of components vs. explained variance
plt.figure(figsize=(8,6))

components = np.arange(1, n_components+1, step=1)
var = np.cumsum(pca.explained_variance_ratio_)

plt.plot(components, var, color='pink')
plt.axhline(y=0.95, color='teal', label='95% Var Explained')

plt.title('Number of Components Needed to Explain Variance')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Variance (%)')

plt.legend(loc=4)
plt.show()


In [None]:
# calculate PCA results and save image for each dimension

samples = []
expl_var = []
dims = []

for i in range(1,len(images)+1,1):
    
    # apply PCA
    pca = decomposition.PCA(n_components=i)

    # fit images to PCA to reduce dimensions
    transformed = pca.fit_transform(images)
    
    # transform image to save samples
    inverse = pca.inverse_transform(transformed)
    formatted = np.clip(inverse, 0, 255).astype(np.uint8)
    
    # save one random sample from each dimension
    random_row = np.random.randint(len(images), size=1)
    sample = formatted[random_row]
    
    # calculate explained variance
    pca_var_expl = \
    np.round(np.cumsum(pca.explained_variance_ratio_)[-1],2)
    
    # save results
    samples.append(sample)    
    expl_var.append(pca_var_expl)
    dims.append(i)
    
# plot sample from each dimension

print("finished applying PCA")
# reshape formatted samples
reshaped = []
for i in samples:
    new_img = Image.fromarray(i.reshape(100,100,3), 'RGB')
    reshaped.append(new_img)

# plot samples
fig = plt.figure(figsize=(16, 20)) 

for i in range(len(samples)): 
    ax = fig.add_subplot(6, 4, i+1, xticks=[], yticks=[]) 
    ax.imshow(reshaped[i], interpolation='nearest')
    ax.set_title(f'Dim: {dims[i]}, Var: {expl_var[i]}')
    
plt.suptitle('Reconstructed Images (All Dimensions)', y=0.92)

plt.show()