# Libraries

In [None]:
import pandas as pd
import numpy as np
import umap.umap_ as umap  # pip install umap-learn
import umap.plot  # pip install umap-learn[plot]
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import QuantileTransformer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

# Import and examine our Data

In [None]:
df = pd.read_csv("D:/DataAskiseis/New_Disq_Functions/malakas_2.csv",
                 skiprows= 1700000,
                 nrows=800000,
                 header = None)
df.head()

In [None]:
df.drop([113,114,115], axis = 1,inplace=True)

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
df[112].value_counts()

In [None]:
df_sample = df.sample(2000, replace=False)
X, y = df_sample.drop(112, axis=1), df_sample[[112]].values.flatten()

# Preprocess

In [None]:
pipe = make_pipeline(QuantileTransformer(),StandardScaler())
X = pipe.fit_transform(X.copy())
X.shape

# Supervised UMAP transformation on 2000 points

In [None]:
manifold = umap.UMAP(n_neighbors = 1000, min_dist = 0.1, n_components = 2, metric = "manhattan").fit(X, y)
X_reduced = manifold.transform(X)

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the malakas dataset by UMAP', fontsize=14);

In [None]:
umap.plot.points(manifold, labels = y, theme = "fire");

In [None]:
umap.plot.connectivity(manifold, labels=y, theme="fire");

In [None]:
umap.plot.connectivity(manifold, show_points=True);

# Build a UMAP model on the training set and apply it on the test and validation sets

In [None]:
# our dataframe and our target
df_sample = df.sample(2000, replace = False)
X, y = df_sample.drop(112, axis = 1), df_sample[[112]].values.flatten()

In [None]:
# Preprocess
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

In [None]:
# train and test split
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state = 42)

X_train.shape

In [None]:
# Supervised UMAP transformation on the training set
manifold = umap.UMAP(n_neighbors = 1499, min_dist = 0.1, n_components = 2, metric = "manhattan").fit(X_train, y_train)

In [None]:
# Make the new training dataframe with the UMAP features as predictors
X_reduced = manifold.transform(X_train)

In [None]:
# Visualize our new training dataframe
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y_train, s = 11, cmap = 'Spectral');
plt.title('Embedding of the training dataset by UMAP', fontsize = 14);

In [None]:
# Apply the UMAP transformation on the test set and make the new testing dataframe
X_reduced = manifold.transform(X_test)

In [None]:
# Visualize our new testing dataframe
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y_test, s = 11, cmap = 'Spectral');
plt.title('Embedding of the testing dataset by UMAP', fontsize = 14);

# Load our validation sets

In [None]:
shock_1 = pd.read_csv("shock1.csv", header = None)
shock_2 = pd.read_csv("shock2.csv", header = None)
shock_3 = pd.read_csv("gausian.csv", header = None)
shock_4 = pd.read_csv("shock_4.csv", header = None)

# Preprocess shock_1 dataset

In [None]:
shock_1_sample = shock_1.sample(800, replace = False)
X, y = shock_1_sample.drop(112, axis = 1), shock_1_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

# Apply the UMAP transformation on the shock_1 dataset and make our new shock_1 dataframe with UMAP features as predictors


In [None]:
X_reduced = manifold.transform(X)

# Visualize our new dataframe

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the shock_1 dataset by UMAP', fontsize = 14);

# Preprocess shock_2 dataset

In [None]:
shock_2_sample = shock_2.sample(800, replace = False)
X, y = shock_2_sample.drop(112, axis = 1), shock_2_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

# Apply the UMAP transformation on the shock_2 dataset and make our new shock_2 dataframe with UMAP features as predictors

In [None]:
X_reduced = manifold.transform(X)

# Visualize our new dataframe

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the shock_2 dataset by UMAP', fontsize = 14);

# Preprocess shock_3 dataset

In [None]:
shock_3_sample = shock_3.sample(800, replace=False)
X, y = shock_3_sample.drop(112, axis=1), shock_3_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

# Apply the UMAP transformation on the shock_3 dataset and make our new shock_3 dataframe with UMAP features as predictors

In [None]:
X_reduced = manifold.transform(X)

# Visualize our new dataframe

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the shock_3 dataset by UMAP', fontsize=14);

# Preprocess shock_4 dataset

In [None]:
shock_4_sample = shock_4.sample(800, replace=False)
X, y = shock_4_sample.drop(112, axis = 1), shock_4_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

# Apply the UMAP transformation on the shock_4 dataset and make our new shock_4 dataframe with UMAP features as predictors

In [None]:
X_reduced = manifold.transform(X)

# Visualize our new dataframe

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the shock_4 dataset by UMAP', fontsize=14);

# Build a model

In [None]:
df_sample = df.sample(2000, replace=False)
X, y = df_sample.drop(112, axis=1), df_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(), StandardScaler())
X = pipe.fit_transform(X.copy())

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, random_state = 42)

In [None]:
# fit a model without UMAP transformation
svc = SVC().fit(X_train, y_train)

In [None]:
svc.score(X_train, y_train)

In [None]:
svc.score(X_test, y_test)

In [None]:
X_train. shape

In [None]:
# Now apply the UMAP transformation
# Supervised UMAP transformation on the training set
manifold = umap.UMAP(n_neighbors = 1499, min_dist = 0.1, n_components = 2, metric = "manhattan").fit(X_train, y_train)
X_reduced = manifold.transform(X_train)

In [None]:
# Fit a model with UMAP transformation applied
svc = SVC().fit(X_reduced, y_train)

In [None]:
svc.score(X_reduced, y_train)

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y_train, s = 11, cmap = 'Spectral');
plt.title('Embedding of the training dataset by UMAP', fontsize = 14);

# Predictions on the test set with the umap features as predictors

In [None]:
X_reduced = manifold.transform(X_test)
svc.score(X_reduced, y_test)

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y_test, s = 11, cmap = 'Spectral');
plt.title('Embedding of the testing dataset by UMAP', fontsize = 14);

# Predictions on the shock_4 dataset

In [None]:
shock_4_sample = shock_4.sample(800, replace=False)
X, y = shock_4_sample.drop(112, axis=1), shock_4_sample[[112]].values.flatten()

In [None]:
pipe = make_pipeline(QuantileTransformer(),StandardScaler())
X = pipe.fit_transform(X.copy())

In [None]:
X_reduced = manifold.transform(X)
svc.score(X_reduced,y)

In [None]:
plt.scatter(X_reduced[:, 0], X_reduced[:, 1], c = y, s = 11, cmap = 'Spectral');
plt.title('Embedding of the shock_4 dataset by UMAP', fontsize = 14);