In [None]:
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.svm import SVR
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score
from sklearn.decomposition import PCA
from sklearn.datasets import load_breast_cancer, load_digits, load_iris
from sklearn.neighbors import KNeighborsClassifier
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN

In [None]:
# 1. Supervised_Learning_1_Basics
# 1. Supervised_Learning_2_k_Nearest_Neighbors
# 1. Supervised_Learning_3_Linear_Models
# 1. Supervised_Learning_4_Decision_Trees_Ensembles

In [None]:
# 1. Supervised_Learning_5_Support_Vector_Machines

########## SVR

X, y = mglearn.datasets.load_extended_boston()]
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

scalerX = StandardScaler()
scalerX.fit(X_train)
X_train_scaled = scalerX.transform(X_train)
X_test_scaled = scalerX.transform(X_test) # Same scaling for the test datset (Exploring the test dataset is prohibited.)

# Slack variable in SVR are effected by the target value. Ergo, the target needs to be also scaled.
# (Prof. 강석호) 회귀 문제를 위한 예측모델 학습 시(예를 들어, Decision Tree와 k-NN은 target label의 scaling에 invariant하여 제외) target label의 scaling이 권장됩니다.

scalerY = StandardScaler()
scalerY.fit(y_train.reshape(-1,1))
y_train_scaled = scalerY.transform(y_train.reshape(-1,1))
y_test_scaled = scalerY.transform(y_test.reshape(-1,1))

reg = SVR()
reg.fit(X_train_scaled, y_train_scaled)

# For Evaluation, target value should be inverse transformed.
y_train_hat_scaled = reg.predict(X_train_scaled)
y_train_hat = scalerY.inverse_transform(y_train_hat_scaled.reshape(-1,1))

print(mean_absolute_error(y_train, y_train_hat))
print(mean_squared_error(y_train, y_train_hat) ** 0.5) # Root for RMSE
print(r2_score(y_train, y_train_hat))


y_test_hat_scaled = reg.predict(X_test_scaled)
y_test_hat = scalerY.inverse_transform(y_test_hat_scaled.reshape(-1,1))

In [None]:
# 1. Supervised_Learning_6_Neural_Networks

# Like SVR MLPRegressor needs to scale y values

In [None]:
# 2. Unsupervised_Learning_1_Basics

In [None]:
# 2. Unsupervised_Learning_2_PCA

########## PCA in supervised learning(w\ KNN)

cancer = load_breast_cancer()
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state= 42)

scaler = StandardScaler()
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train)
X_test_scaled = scaler.transform(X_test)

pca = PCA(n_components=2) # without n_components no dimension reduction is occured
pca.fit(X_train_scaled)

X_train_pca = pca.transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

clf = KNeighborsClassifier(n_neighbors= 3)
clf.fit(X_train_pca, y_train)

y_train_hat = clf.predict(X_train_pca)
y_test_hat = clf.predict(X_test_pca)

# PCA have inverse transformation so it can get new data and evaluate it
X_test_rec = pca.inverse_transform(X_test_pca) # 여기서 X_test_scaled 아니야!

In [None]:
#2. Unsupervised_Learning_3_tSNE

# Unlike PCA, t-SNE does not support transforming new(test) data

digits = load_digits()

tsne = TSNE(random_state=42)
digits_tsne = tsne.fit_transform(digits.data) # use fit_transform instead of fit, as t-SNE has no transform method

In [None]:
# 2. Unsupervised_Learning_4_kMeans_HC (중요도 낮음)

# K-means는 새로운 데이터 접근가능!

iris = load_iris()
X_train, y_train = iris.data, iris.target

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_scaled) # 새로운 데이터에 하려면 fit만

kmeans = KMeans(n_clusters=3)
kmeans.fit(X_train_scaled)

print(scaler.inverse_transform(kmeans.cluster_centers_))

assignments_X_train_scaled = kmeans.labels_

assignments_X_new = kmeans.predict(X_new)


# Hierarchical CLustering의 대표적인 Agglomerative Clustering은 새로운 데이터 접근 불가능! (no predict method)
agg = AgglomerativeClustering(n_clusters=3, linkage= "ward")
agg.fit(X_train)

assignments_X_train = agg.labels_

In [None]:
# 2. Unsupervised_Learning_5_DBC
scaler = MinMaxScaler((-1,1))
X_train_scaled = scaler.fit_transform(X_train)

dbscan = DBSCAN()
dbscan.fit(X_train_scaled)

assignments_X_train_scaled = dbscan.labels_