In [48]:
from sklearn.datasets import fetch_openml
from sklearn.ensemble import RandomForestClassifier
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE, LocallyLinearEmbedding, MDS
import matplotlib.pyplot as plt
import plotly.express as px

In [49]:
X, y = fetch_openml('mnist_784', version=1, cache=True, return_X_y=True)
train_x, test_x = X[:60000], X[60000:] # total is 70000
train_y, test_y = y[:60000], y[60000:]

In [10]:
%%time
# EXERCISE 9
rfc = RandomForestClassifier(n_estimators=200, max_leaf_nodes=5000, n_jobs=-1)
rfc.fit(train_x, train_y)
print(rfc.score(train_x, train_y), end=" ")
print(rfc.score(test_x,test_y))

1.0 0.972
CPU times: user 2min 10s, sys: 427 ms, total: 2min 11s
Wall time: 9.43 s


In [5]:
pca = PCA(n_components=0.95)
train_reduced_x = pca.fit_transform(train_x)
test_reduced_x = pca.transform(test_x)

In [6]:
%%time
rfc = RandomForestClassifier(n_estimators=200, max_leaf_nodes=5000, n_jobs=-1)
rfc.fit(train_reduced_x, train_y)
print(rfc.score(train_reduced_x, train_y), end=" ")
print(rfc.score(test_reduced_x,test_y))

0.9997166666666667 0.9506
CPU times: user 3min 12s, sys: 254 ms, total: 3min 12s
Wall time: 13 s


In [36]:
# EXERCISE 10
tsne = TSNE(n_components=2, perplexity=75.0, learning_rate=1000, random_state=42, n_jobs=-1)
X_embedded = tsne.fit_transform(X[:5000])    
fig = px.scatter(X_embedded, x=X_embedded[:, 0], y=X_embedded[:, 1], color=y[:5000])
fig.show()

In [38]:
pca = PCA(n_components=2, random_state=42)
X_embedded = pca.fit_transform(X[:5000])    
fig = px.scatter(X_embedded, x=X_embedded[:, 0], y=X_embedded[:, 1], color=y[:5000])
fig.show()

In [44]:
lle = LocallyLinearEmbedding(n_components=2, n_neighbors=50, reg=0.1, 
                              random_state=42, n_jobs=-1)
X_embedded = lle.fit_transform(X[:5000])    
fig = px.scatter(X_embedded, x=X_embedded[:, 0], y=X_embedded[:, 1], color=y[:5000])
fig.show()

In [50]:
mds = MDS(n_components=2, n_init=10, n_jobs=-1, random_state=42)
X_embedded = mds.fit_transform(X[:1000])    
fig = px.scatter(X_embedded, x=X_embedded[:, 0], y=X_embedded[:, 1], color=y[:1000])
fig.show()