In [None]:
# install the env
# ! pip install nbformat==5.7.3
# ! pip install plotly==4.14.3
# ! pip install mlxtend
# ! pip install scikit-learn
# ! pip install umap-learn

# download the data 
# ! curl -O http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
# ! curl -O http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
# ! gunzip t*-ubyte.gz


In [None]:
import numpy as np
import pandas as pd
import time

# import plot
import plotly.io as plt_io
import plotly.graph_objects as go
import matplotlib

from sklearn.preprocessing import StandardScaler
# import tool - PCA
from sklearn.decomposition import PCA
# import tool - TSNE
from sklearn.manifold import TSNE
# import tool - UMAP
import umap
# import tool - LDA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA

from sklearn import datasets

import platform
from mlxtend.data import loadlocal_mnist


In [None]:

def Loaddataset(name='digits'):
    
    if name == 'sign_mnist':
        # load the sign_mnist data
        # https://www.kaggle.com/datasets/datamunge/sign-language-mnist?resource=download
        train = pd.read_csv('dataset/sign_mnist_test.csv')
        train.head()
        #picking only the first 10 labels
        train = train[train['label'] < 10]
        # Setting the label and the feature columns
        y = train.loc[:,'label'].values
        x = train.loc[:,'pixel1':].values

    if name == 'digits':
        # load the digits data
        # https://scikit-learn.org/stable/auto_examples/datasets/plot_digits_last_image.html
        digits = datasets.load_digits()
        # Setting the label and the feature columns
        x = np.array(digits.images).reshape((-1, 64))
        y = np.array(digits.target)

    if name == 'mnist':
        # load the digits data
        x, y = loadlocal_mnist(
            images_path='train-images-idx3-ubyte', 
            labels_path='train-labels-idx1-ubyte')
        
        # select a subset for time saving
        x = x[:10000]
        y = y[:10000]



    return x,y

In [None]:

def plot_2d(component1, component2):
    # plot the visulize results with plotly
    
    fig = go.Figure(data=go.Scatter(
        x = component1,
        y = component2,
        mode='markers',
        marker=dict(
            size=5,
            color=y, #set color equal to a variable
            colorscale='Rainbow', # one of plotly colorscales
            showscale=True,
            line_width=1
        )
    ))
    fig.update_layout(margin=dict( l=100,r=100,b=100,t=100),width=500,height=500)                 
    fig.layout.template = 'plotly_dark'
    
    fig.show()


In [None]:

def plot_3d(component1,component2,component3):
	fig = go.Figure(data=[go.Scatter3d(
		x=component1,
      	y=component2,
        z=component3,
        mode='markers',
        marker=dict(
            size=10,
            color=y,                # set color to an array/list of desired values
            colorscale='Rainbow',   # choose a colorscale
            opacity=1,
            line_width=1)
    	)])
	fig.update_layout(margin=dict(l=100,r=100,b=100,t=100),width=2000,height=1200)
	fig.layout.template = 'plotly_dark'
	fig.show()


In [None]:

x, y = Loaddataset()

## Standardizing the data
x = StandardScaler().fit_transform(x)
# PCA
start = time.time()

# you can calculate the principalComponents with pca method:
# more information in https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html
# ================= Please fill in the blanks below
principalComponents = ...
# =================

print('Duration: {} seconds'.format(time.time() - start))
principal = pd.DataFrame(data = principalComponents
             , columns = ['principal component 1', 'principal component 2'])


plot_2d(principalComponents[:, 0],principalComponents[:, 1])


In [None]:

x, y = Loaddataset()
# tSNE
start = time.time()
pca_50 = PCA(n_components=50)
pca_result_50 = pca_50.fit_transform(x)
# you can calculate the principalComponents with tsne method:
# more information in https://scikit-learn.org/stable/modules/generated/sklearn.manifold.TSNE.html
# ================= Please fill in the blanks below
tsne = ...
# =================
print('Duration: {} seconds'.format(time.time() - start))

# plot the 2d vis results
plot_2d(tsne[:, 0],tsne[:, 1])
# plot_3d(tsne[:, 0],tsne[:, 1],tsne[:,2])


In [None]:

x, y = Loaddataset()
# UMAP
start = time.time()
# you can calculate the principalComponents with umap method:
# more information in https://github.com/lmcinnes/umap
# ================= Please fill in the blanks below
embedding = ...
# =================
print('Duration: {} seconds'.format(time.time() - start))

plot_2d(embedding[:, 0], embedding[:, 1])
# plot_3d(reducer.embedding_[:, 0],reducer.embedding_[:, 1],reducer.embedding_[:, 2])

