In [None]:
## Setting up my workstation and working environment (virtual)**
# Recall an Environment consists of: (1) Interpreter: Python Version AND (2) Number of packages or libraries

# From the conda terminal OR python terminal
# check conda environments and create one called ml
#conda info --envs
#conda create -n ml python=3.7.6
#conda activate ml

# Next, from the conda terminal, install all the required packages
# conda install matplotlib


In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn import datasets
from sklearn import manifold

In [None]:
# Fetch the data using sklearn datasets
data = datasets.fetch_openml(
                    'mnist_784',
                    version=1,
                    return_X_y=True
)
pixel_values, targets = data
# Convert target variable from string to integer
targets = targets.astype(int)

In [None]:
# Vsialize sample of pixel_values dataset by reshaping to original size and plotting using matplotlib
single_image = pixel_values[1,:].reshape(28,28)
plt.imshow(single_image, cmap='gray')

In [None]:
# Create the t-SNE transformation of the data with 2 components

tsne = manifold.TSNE(n_components=2, random_state=42)
transformed_data = tsne.fit_transform(pixel_values[:3000,:])

# Convert to a DataFrame from a numpu array
tsne_df = pd.DataFrame(
    np.column_stack((transformed_data, targets[:3000])), # stacks columns into a 2D array
    columns=["x","y", "targets"]
)

tsne_df.loc[:, "targets"] = tsne_df.targets.astype(int) # convert to integer



In [None]:
tsne_df.head(n=10) # show the first 10 rows

#plot using seaborn and matplotlib library
grid = sns.FacetGrid(tsne_df, hue="targets", size=8)
grid.map(plt.scatter, "x", "y").add_legend