In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
import sys, os
sys.path.append('..')

import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.grid_search import GridSearchCV
from matplotlib import pyplot as plt

from get_data import get_data, get_splitted_data
from models.active_model import ActiveLearningExperiment
from models.strategy import *
from models.utils import ObstructedY

from misc.config import c
data_dir = c["DATA_DIR"]

import kaggle_ninja

In [None]:
kaggle_ninja.turn_on_force_reload_all()

loader = ["get_splitted_uniform_data",
             {"n_folds": 1,
              "seed":666,
              "test_size":0.0}]
preprocess_fncs = []

tiles = get_data([['5ht7', 'ExtFP']], loader, preprocess_fncs).values()[0][0][0]

X = tiles['X_train']['data']
y = tiles['Y_train']['data']
print X.shape
print y.shape

y = ObstructedY(y)

warm_start, _ = random_query(X, y, None, batch_size=300, seed=666)
y.query(warm_start)

In [None]:
euc_dist = construct_normalized_euc(X)

In [None]:
model = SVC(C=1, kernel='linear')
model.fit(X[warm_start], y[warm_start])

In [None]:
unc_pick, _ = uncertanity_sampling(X, y, model, batch_size=20, seed=666)
greedy_pick, greedy_score = quasi_greedy_batch(X, y, model, c=0.5, batch_size=20, seed=666, dist='exp_euc')

In [None]:
plt.figure(figsize=(10,10))
greedy_y = np.copy(y._y)
greedy_y[greedy_pick] = 2
plt.scatter(X[:,0], X[:,1], c=greedy_y, linewidths=0, cmap=plt.cm.coolwarm, s=100)

In [None]:
plt.figure(figsize=(10,10))
unc_y = np.copy(y._y)
unc_y[unc_pick] = 2
plt.scatter(X[:,0], X[:,1], c=unc_y, linewidths=0, cmap=plt.cm.coolwarm, s=100)

In [None]:
from experiments.experiment_runner import run_experiment, run_experiment_grid
import experiments
from experiments import experiment_runner, fit_active_learning, fit_grid
from experiments.utils import get_best
from models.strategy import cosine_distance_normalized

from kaggle_ninja import *

grid_result_passive = run_experiment("fit_grid",
                                    recalculate_experiments=False,
                                    n_jobs = 1,
                                    experiment_detailed_name="fit_svm_passive_tiles",
                                    base_experiment="fit_active_learning",
                                    seed=666,
                                    grid_params = {"base_model_kwargs:C": list(np.logspace(-5,5,10)),
                                                   "base_model_kwargs:kernel": ['linear']},
                                    base_experiment_kwargs={"strategy": "random_query",
                                                       "loader_function": "get_splitted_uniform_data",
                                                       "batch_size": 20,
                                                       "base_model": "SVC",
                                                       "loader_args": {"n_folds": 2}})

In [None]:
grid_result_uncertainty = run_experiment("fit_grid",
                                    recalculate_experiments=False,
                                    n_jobs = 1,
                                    experiment_detailed_name="fit_svm_uncertainty_tiles",
                                    base_experiment="fit_active_learning",
                                    seed=666,
                                    grid_params = {"base_model_kwargs:C": list(np.logspace(-5,5,10)),
                                                   "base_model_kwargs:kernel": ['linear']},
                                    base_experiment_kwargs={"strategy": "uncertanity_sampling",
                                                       "loader_function": "get_splitted_uniform_data",
                                                       "batch_size": 20,
                                                       "base_model": "SVC",
                                                       "loader_args": {"n_folds": 2}})

In [None]:
from experiments.utils import *

passive_exp = get_best(grid_result_passive.experiments, "mean_mcc_valid")
print len(passive_exp.monitors) # 2, bc there were 2 folds

unc_exp = get_best(grid_result_uncertainty.experiments, "mean_mcc_valid")
print len(unc_exp.monitors) # 2, bc there were 2 folds

pas_mon = passive_exp.monitors[0]
pas_mon.keys()

In [None]:
plot_monitors([unc_exp, passive_exp], folds='mean')

In [None]:
passive_exp.monitors[1]['precision_score_concept']

In [None]:
from experiments.utils import *

passive_exp = get_best(grid_result_passive.experiments, "mean_mcc_valid")
print len(passive_exp.monitors) # 2, bc there were 2 folds

plot_monitors(passive_exp, folds='all')

In [None]:
grid_result_greedy = run_experiment("fit_grid",
                                    recalculate_experiments=False,
                                    n_jobs = 8,
                                    experiment_detailed_name="fit_svm_greedy_tiles",
                                    base_experiment="fit_active_learning",
                                    seed=666,
                                    grid_params = {"base_model_kwargs:C": list(np.logspace(-5,5,10)),
                                                   "base_model_kwargs:kernel": ['linear'],
                                                   "strategy_kwargs:c": list(np.linspace(0.1, 0.9, 9)),
                                                   "strategy_kwargs:dist": ["exp_euc"]},
                                    base_experiment_kwargs={"strategy": "quasi_greedy_batch",
                                                       "loader_function": "get_splitted_uniform_data",
                                                       "batch_size": 20,
                                                       "base_model": "SVC",
                                                       "loader_args": {"n_folds": 2}})

In [None]:
greedy_exp = get_best(grid_result_greedy.experiments, "mean_mcc_valid")
print len(passive_exp.monitors) # 2, bc there were 2 folds

In [None]:
plot_monitors(passive_exp, folds='all')

In [None]:
from sklearn.metrics import precision_score

precision_score([1, 1, -1, -1], [-1, -1, -1, -1])