# Активное обучение

## Библиотеки

In [None]:
!pip install modAL

In [None]:
from abc import abstractmethod

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
from matplotlib import animation

from IPython.display import HTML

from modAL.models import ActiveLearner
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.gaussian_process import GaussianProcessRegressor

from sklearn.svm import SVR

In [None]:
import warnings
warnings.filterwarnings("ignore")

## Пример активного обучения

### Определим асессора, который будет размечать данные нужные в рамках алгоритма активного обучения

In [None]:
class Assessor:
    def __init__(self, function, epsilon=0.1):
        self.function = function
        self.epsilon = epsilon

    def __call__(self, x):
        r'''
        :param x: Array
        '''
        return self.function(x, self.epsilon)

### Данные

In [None]:
np.random.seed(42)

l = 500
m = 5
n_queries = 150

X = np.linspace(0, 10, m).reshape(-1, 1)

y = (np.sin(X) + 0.5*np.random.randn(m, 1)).reshape(-1)

_X = np.linspace(0, 10, l).reshape(-1, 1)

In [None]:
assessor = Assessor(lambda x, epsilon: (np.sin(x) + epsilon*np.random.randn(*(x).shape)).reshape(-1))

### Рассмотрим наш любимый синус

In [None]:
plt.figure(figsize=(8, 4))

plt.plot(np.linspace(0, 10, 100), 
         np.sin(np.linspace(0, 10, 100).reshape(-1, 1)))

plt.scatter(X, y, color='red')
plt.title('Initial dataset')
plt.show()

### Определим активное обучение, которое семплирует случайные объекты

In [None]:
def random_sampling(classifier, X_pool):
    n_samples = len(X_pool)
    query_idx = np.random.choice(range(n_samples))
    return query_idx, X_pool[query_idx]

learner = ActiveLearner(
    estimator=SVR(),
    query_strategy=random_sampling,
    X_training=X, y_training=y
)

### Посмотрим на начальную инициализацию

In [None]:
plt.figure(figsize=(8, 4))

pred = learner.predict(_X.reshape(-1, 1))
plt.plot(_X, pred)

plt.scatter(X, y, color='red')
plt.title('Initial estimation based on %d points' % len(X))
plt.show()

### Начнем обучать и собирать историю

In [None]:
learning_history = []
learning_history_point = []

pred = learner.predict(_X.reshape(-1, 1))
learning_history.append(pred)

In [None]:
for idx in range(n_queries):
    query_idx, query_instance = learner.query(_X)
    assessor_pred = assessor(query_instance)
    learner.teach(query_instance.reshape(-1, 1), assessor_pred.reshape(-1))

    pred = learner.predict(_X.reshape(-1, 1))
    learning_history.append(pred)
    learning_history_point.append((query_instance, assessor_pred))

In [None]:
plt.figure(figsize=(8, 4))

pred = learner.predict(_X.reshape(-1, 1))
plt.plot(_X, pred)

plt.scatter(X, y, color='red')
plt.scatter([item[0] for item in learning_history_point], 
            [item[1] for item in learning_history_point])
plt.title('Estimated after %d points' % n_queries)
plt.show()

### Посмотрим на визуализацию

In [None]:
fps = 1 # frame per sec
frn = len(learning_history) # frame number of the animation

In [None]:
def update_plot(frame_number, learning_history, learning_history_point, plot):
    plot[0] = ax.plot(_X, learning_history[frame_number])

    plot[2] = ax.scatter([item[0] for item in learning_history_point[frame_number:frame_number + 1]], 
                         [item[1] for item in learning_history_point[frame_number:frame_number + 1]])

fig = plt.figure(figsize=(8.0, 4.0), frameon=False)

ax = fig.add_subplot(111)

plot = [None, None, None]
plot[0] = ax.plot(_X, learning_history[0])
plot[1] = ax.scatter(X, y, color='red')

plt.autoscale(tight=True)

ani = animation.FuncAnimation(
    fig, update_plot, frn, fargs=(learning_history, learning_history_point, plot), interval=1000/fps, blit=False)

In [None]:
HTML(ani.to_jshtml())

### Пример выбора точки, для которой максимальная дисперсия

In [None]:
def max_std_sampling(classifier, X_pool):
    _, std = classifier.predict(X_pool, return_std=True)
    return np.argmax(std)

learner = ActiveLearner(
    estimator=GaussianProcessRegressor(),
    query_strategy=max_std_sampling,
    X_training=X, y_training=y
)

In [None]:
plt.figure(figsize=(8, 4))

pred = learner.predict(_X.reshape(-1, 1))
plt.plot(_X, pred)

plt.scatter(X, y, color='red')
plt.title('Initial estimation based on %d points' % len(X))
plt.show()

In [None]:
learning_history = []
learning_history_point = []

pred = learner.predict(_X.reshape(-1, 1))
learning_history.append(pred)

In [None]:
for idx in range(n_queries):
    query_idx, query_instance = learner.query(_X)
    assessor_pred = assessor(query_instance)
    learner.teach(query_instance.reshape(-1, 1), assessor_pred.reshape(-1))

    pred = learner.predict(_X.reshape(-1, 1))
    learning_history.append(pred)
    learning_history_point.append((query_instance, assessor_pred))

In [None]:
plt.figure(figsize=(8, 4))

pred = learner.predict(_X.reshape(-1, 1))
plt.plot(_X, pred)

plt.scatter(X, y, color='red')
plt.scatter([item[0] for item in learning_history_point], 
            [item[1] for item in learning_history_point])
plt.title('Estimated after %d points' % n_queries)
plt.show()

### Посмотрим на визуализацию

In [None]:
fps = 1 # frame per sec
frn = len(learning_history) # frame number of the animation

In [None]:
def update_plot(frame_number, learning_history, learning_history_point, plot):
    plot[0] = ax.plot(_X, learning_history[frame_number])

    plot[2] = ax.scatter([item[0] for item in learning_history_point[frame_number:frame_number + 1]], 
                         [item[1] for item in learning_history_point[frame_number:frame_number + 1]])

fig = plt.figure(figsize=(8.0, 4.0), frameon=False)

ax = fig.add_subplot(111)

plot = [None, None, None]
plot[0] = ax.plot(_X, learning_history[0])
plot[1] = ax.scatter(X, y, color='red')

plt.autoscale(tight=True)

ani = animation.FuncAnimation(
    fig, update_plot, frn, fargs=(learning_history, learning_history_point, plot), interval=1000/fps, blit=False)

In [None]:
HTML(ani.to_jshtml())