# Example for semi-supervised SOMClassifier

In [4]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_digits
from sklearn import metrics

# --- for running the script without pip
import sys
sys.path.append("../")
# ---

import susi

## Get data

We modify the `load_digits` dataset of scikit-learn for this semi-supervised application.
Therefore, we ramdomly set labels of datapoints (only!) in the training dataset to the placeholder -1.
This variable also has to be set in the hyperparameter  `missing_label_placeholder=1` of the `SOMClassifier`.

In [5]:
### define ratios (between 0 and 1)
test_size = 0.5
unlabeled_size = 0.9

# define test dataset
data = load_digits(n_class=5)
X_train, X_test, y_train, y_test = train_test_split(
    data.data, data.target, test_size=test_size, random_state=1)

# preprocessing
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# data with missing labels -> semi-supervised
rng = np.random.RandomState(2)
random_unlabeled_points = rng.rand(len(y_train)) < unlabeled_size
y_train[random_unlabeled_points] = -1

print("Datapoints for training with label (supervised):\t", y_train.shape[0])
print("Datapoints for training with label (semi-supervised):\t", np.sum(y_train != -1))
print("Datapoints for testing:\t\t\t\t\t", y_test.shape[0])

Datapoints for training with label (supervised):	 450
Datapoints for training with label (semi-supervised):	 39
Datapoints for testing:					 451


## Semi-supervised Classification

In [6]:
# NBVAL_IGNORE_OUTPUT


som_semi = susi.SOMClassifier(
    n_rows=15,
    n_columns=15,
    n_iter_unsupervised=5000,
    n_iter_supervised=5000,
    missing_label_placeholder=-1,
    random_state=42)
som_semi.fit(X_train, y_train)
y_pred = som_semi.predict(X_test)
accuracy = metrics.accuracy_score(y_test, y_pred)

print("Accuracy = {0:.1f} %".format(accuracy*100))

Accuracy = 86.5 %
