In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_blobs, make_classification
from sklearn.preprocessing import StandardScaler
from modAL.models import ActiveLearner
from modAL.uncertainty import entropy_sampling
from modAL.batch import uncertainty_batch_sampling
from modAL.uncertainty import classifier_uncertainty, classifier_margin, classifier_entropy, entropy_sampling
from modAL.models import Committee
from modAL.disagreement import vote_entropy_sampling
from Models import models, plot, sampling
from Data import datasets
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from mpl_toolkits.mplot3d import Axes3D

Import experiment data

In [None]:
# Show all ammonium iodide studied
print(datasets.Amine_done)

# import experiment data and select ammonium iodide type
df_ammonium = datasets.df_dict['Ethylammonium Iodide']
#########################################################

# Generate input, output dataframe
X = df_ammonium.drop(['RunID_vial', '_out_crystalscore',\
             '_raw_acid_0_inchikey', '_raw_inorganic_0_inchikey',\
             '_raw_organic_0_inchikey', '_raw_solvent_0_inchikey'], axis =1)
y = df_ammonium.filter(['_out_crystalscore'], axis =1)

# Standarize the input
x = StandardScaler().fit_transform(X) # dataframe turned into array and it is reformed as dataframe in the below line
X = pd.DataFrame(x, index = X.index, columns = X.columns)
# y = (y.replace([1,2,3],0)).replace(4,1) # turned into binary classification

# select intial training pool and testing pool
init_idx = np.random.choice(np.array(X.index), size=24)

# It is complicated to directly feed pd.Dataframe to Modal.Activelearner 
# because the index of dataframe is kept the same after dropping off query instances
# So I think it is easy to just convert them back to np.array and we can retrieve column label afterward
X_training = np.array(X.filter(init_idx, axis = 0))
y_training = np.array(y.filter(init_idx, axis = 0))
y_training = y_training.reshape(len(y_training),)
X_test = np.array(X.drop(init_idx, axis = 0))
y_test = np.array(y.drop(init_idx, axis = 0))
y_test = y_test.reshape(len(y_test),)

Import synthetic data

In [None]:
## import synthetic data
X = datasets.X_syn1
y = datasets.y_syn1
## generate initial training and testing pool
init_idx = np.random.choice(np.array(len(X)), size=5)
X_training, y_training = X[init_idx], y[init_idx]
y_training = y_training.reshape(len(y_training),)
X_test, y_test = np.delete(X, init_idx, axis=0), np.delete(y, init_idx)
y_test = y_test.reshape(len(y_test),)

Estimator choices: ['SVC_rbf', 'SVC_Pearson', 'RF', 'xgboost', 'kNN', 'GPC','GPR']

In [None]:
## Active learning process
# initializing the learner
classifier = 'GPR'
# custom_query_strategy is based on uncertainty sampling for classification
# use regression_std if you are doing regression
query_strategy = models.regression_std
n_queries = 100
n_instances = 1
#########################################################
learner = ActiveLearner(estimator= models.estimator[classifier],\
                        X_training=X_training, y_training=y_training,\
                        query_strategy = query_strategy)

# This function is used to generate learning curve for the active-learner
# In each interation of active learning, new training sets are taken from X_test 
# and moved to X_training. (X,y) are used to generate testing score. 
train_size, scores = models.actlearn_perf(learner=learner, X=X, y=y, \
                                     X_training=X_training, y_training=y_training, \
                                     X_test=X_test, y_test=y_test, n_queries=n_queries, n_instances=n_instances)

In [None]:
# Plot learning curve of obove active-learning process
fig = plt.figure()
ax = fig.add_subplot(111)
ax.plot(train_size, scores, 'o-', c = 'black', \
        fillstyle='none', label = 'active-learning')
ax.set_ylim(-2,1)
ax.legend(loc='best')
ax.set_xlabel('Number of training dataset')
ax.set_ylabel('Prediciton accuracy')
plt.savefig('Graphs/'+'Learning curve of kNN active learner'+'.png')