# Dependencies

In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

# Part 1: Prepare the Data

In [None]:
file = "myopia.csv"
df = pd.read_csv(file)

In [None]:
df

In [None]:
df_unsupervised = df.drop("MYOPIC", axis=1)
df_unsupervised

In [None]:
# define X and y variables
X = df_unsupervised
y = df["MYOPIC"]

In [None]:
X.shape

In [None]:
y.shape

In [None]:
df["MYOPIC"].shape

In [None]:
# split data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Scale data
X_scaler = StandardScaler()
X_scaler.fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
X_train_scaled

# Part 2: Apply Dimensionality Reduction

## Perform PCA

In [None]:
# import PCA
from sklearn.decomposition import PCA

In [None]:
# define componets/explained_variance
pca = PCA(n_components = 0.9)

In [None]:
# fit/tranform model
myopia_pca = pca.fit_transform(X_train_scaled)

In [None]:
myopia_pca.shape

In [None]:
# define new dataframe and view
df_unsupervised_myopia = pd.DataFrame(data=myopia_pca)
df_unsupervised_myopia

In [None]:
df_unsupervised_myopia["class"]=pca.labels_

## Perform TSNE

In [None]:
#import TSNE
from sklearn.manifold import TSNE

In [None]:
# fit TSNE model to PCA dataframe
tsne = TSNE(learning_rate=250)
tsne_features = tsne.fit_transform(df_unsupervised_myopia)

In [None]:
# look at shape
tsne_features.shape

In [None]:
# import plotting abilities and plot TSNE
import matplotlib.pyplot as plt
plt.scatter(tsne_features[:,0], tsne_features[:,1])
plt.show()

# Part 3: Peform a Cluster Analysis with K-menas

In [None]:
# import KMeans
from sklearn.cluster import KMeans

In [None]:
# perform for loop to find elbow
inertia = []
k = list(range(1,11))
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(df_unsupervised)
    inertia.append(km.inertia_)

In [None]:
# create elbow curve by defining elbow data
elbow_data = {"k":k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.head()

In [None]:
# create elbow curve
plt.plot(df_elbow['k'], df_elbow['inertia'])
plt.xticks(range(1,11))
plt.xlabel('Number of Clusters')
plt.ylabel('Inertia')
plt.title('Elbow curve of Myopic data')
plt.show()

In [None]:
# instantiate KMeans model with defined cluster amount of 3
model = KMeans(n_clusters=3)

In [None]:
# fit model to PCA ouutput data and predict clusters
model.fit(myopia_pca)
predicted_clusters = model.predict(myopia_pca)

In [None]:
# scatter plot
plt.scatter(tsne_features[:,0], tsne_features[:,1], c=predicted_clusters)
plt.show()

# Part 4: Make a Recommendation