# Um estudo sobre aprendizado ativo

Nesse noteboook implementei minha própria classe de um algoritmo de aprendizado ativo.

Esse algoritmo, *MyActLearning*, usa como base o LR e pensei em usar ele com 10% dos dados de treinamento.

Usei o BC dataset para uma primeira comparação desse algorimo com o LR. Os resultados foram interessantes.

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.datasets import fetch_openml, load_breast_cancer

from sklearn.manifold  import TSNE
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.manifold import MDS
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis


from sklearn.model_selection import train_test_split,  KFold, cross_val_score, cross_val_predict
from sklearn.preprocessing import LabelEncoder

from sklearn.metrics import accuracy_score, f1_score,confusion_matrix

#Logistic Regression:
from sklearn.linear_model import LogisticRegression
#Support Vector Machines (SVM):
from sklearn.svm import SVC
#Random Forest:
from sklearn.ensemble import RandomForestClassifier
#K-Nearest Neighbors (KNN):
from sklearn.neighbors import KNeighborsClassifier

#Decision Tree:
from sklearn.tree import DecisionTreeClassifier
#Gradient Boosting:
from sklearn.ensemble import GradientBoostingClassifier
#Gaussian Naive Bayes:
from sklearn.naive_bayes import GaussianNB
#AdaBoost:
from sklearn.ensemble import AdaBoostClassifier
#ICA
from sklearn.decomposition import FastICA

import math



In [3]:
X = load_breast_cancer().data
y = load_breast_cancer().target

In [4]:
X.shape

(569, 30)

In [5]:
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

In [57]:
class MyActLearning():

    def __init__(self, n_iterations=5, k_samples=10):
        self.n_iterations = n_iterations
        self.k_samples = k_samples
        self.classifier = LogisticRegression(random_state=42, max_iter=1000)
    
    def fit(self, X_pool, y_pool):
        test_size_aux = X_pool.shape[0] - (0+1)*10
        X_train, X_val, y_train, y_val = train_test_split(X_pool, y_pool, test_size=test_size_aux, random_state=42)

        for i in range(self.n_iterations):
            model = self.classifier.fit(X_train, y_train)
            y_pred = self.classifier.predict(X_val)

            acc = accuracy_score(y_pred, y_val)

            #print(f'Interation {i}: acc = {round(acc,3)*100.}')

            # Obter as probabilidades das classes para o conjunto de teste
            probabilities = model.predict_proba(X_val)

            # Calcular a diferença entre as probabilidades das classes
            differences = np.abs(probabilities[:, 0] - probabilities[:, 1])

            # Ordenar os exemplos pelo valor absoluto da diferença (do menor para o maior)
            sorted_indices = np.argsort(differences)

            # Exibir os 10 exemplos mais incertos
            most_uncertain_indices = sorted_indices[:self.k_samples]

            X_train = np.concatenate((X_train, X_val[most_uncertain_indices]))
            y_train = np.concatenate((y_train, y_val[most_uncertain_indices]))

            X_val = np.delete(X_val, most_uncertain_indices, axis=0)
            y_val = np.delete(y_val, most_uncertain_indices)
    
    def get_params(self, deep=True):
        # Retorna os parâmetros do estimador em um dicionário
        return {"n_iterations": self.n_iterations, "k_samples": self.k_samples}
    
    def predict(self, X):
        y_pred = self.classifier.predict(X)
        return y_pred

In [13]:
pca = PCA(n_components=2)
pca.fit(Xs)
Xs_pca = pca.transform(Xs)

In [58]:
my_model = MyActLearning()


In [30]:
X_train, X_test, y_train, y_test,  = train_test_split(Xs, y, random_state=42, test_size=0.2)
my_model = MyActLearning()
my_model.fit(X_train, y_train)

Interation 0: acc = 93.89999999999999
Interation 1: acc = 96.8
Interation 2: acc = 97.39999999999999
Interation 3: acc = 98.8
Interation 4: acc = 98.8


In [31]:
y_pred = my_model.predict(X_test)
acc = round(accuracy_score(y_pred, y_test),3)*100

In [62]:
lr = LogisticRegression(random_state=42, max_iter=1000)
lr.fit(X_train, y_train)
y_pred2 = lr.predict(X_test)
acc2 = round(accuracy_score(y_pred2, y_test), 3)*100

In [61]:
print('Comparação dos dois valores calculados:')
print(f'LR: {acc2}\nMyActLearning: {acc}')

Comparação dos dois valores calculados:
LR: 97.39999999999999
MyActLearning: 97.39999999999999


In [63]:
cv = KFold(n_splits=5, random_state=0, shuffle=True)
#Executar a validação cruzada e obter as pontuações de validação cruzada
scores2 = cross_val_score(lr, Xs, y, cv=cv, scoring="accuracy")
scores = cross_val_score(my_model, Xs, y, cv=cv, scoring="accuracy")

print(f'Resultado da validção cruzada:\nLRegression:{round(scores2.mean(),4)*100}\nAtivo: {round(scores.mean(),4)*100} ')


Resultado da validção cruzada:
LRegression:97.37
Ativo: 95.78999999999999 


In [64]:
scores2_pca = cross_val_score(lr, Xs_pca, y, cv=cv, scoring="accuracy")
scores_pca = cross_val_score(my_model, Xs_pca, y, cv=cv, scoring="accuracy")



In [65]:
print(f'Após o PCA:\nLRegression:{round(scores2_pca.mean(),4)*100}\nAtivo: {round(scores_pca.mean(),4)*100} ')


Após o PCA:
LRegression:94.74000000000001
Ativo: 93.15 
