In [34]:
import numpy as np
#import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import SGDClassifier
from sklearn.metrics import accuracy_score, log_loss

In [35]:
random_number_genrator = np.random.default_rng(42)

In [36]:
def online_learning_streaming(n_batchs = 50,
                             batch_sizes = 2048,
                             n_features = 25, 
                             drift = 10):
    mean0_a, mean1_a = np.zeros(n_features), np.ones(n_features)*0.25
    mean0_b, mean1_b = np.ones(n_features)*-0.25, np.ones(n_features)
    cov = np.eye(n_features) * 1.0


    for b in range(n_batchs):
        mean0 = mean0_a if b< drift else mean0_b
        mean1 = mean1_a if b < drift else mean1_b

        number0 = batch_sizes //2 
        number1 = batch_sizes -  number0
    
        data0 = random_number_genrator.multivariate_normal(mean0, cov, size = number0)
        data1 = random_number_genrator.multivariate_normal(mean1, cov, size = number1)
    
    
        X = np.vstack([data0, data1])
        y = np.hstack([np.zeros(number0, dtype = int), np.ones(number1, dtype = int)])
    
    
    
        index = random_number_genrator.permutation(len(y))
    
        yield X[index], y[index], b

In [47]:
def online_learning_streaming_classification(n_batchs = 50,
                                            batch_size = 2048, 
                                            n_features = 25):
    scaling_method = StandardScaler()
    classification_model = SGDClassifier(loss = "log_loss", 
                                        learning_rate = "optimal",
                                        alpha  = 1e-4,
                                        random_state = 42)
    classes = np.array([0, 1])
    for x, y, b in online_learning_streaming(n_batchs = n_batchs,batch_sizes = batch_size,n_features = n_features):
        scaling_method.partial_fit(x)
        scaling_method_x = scaling_method.transform(x) 


        if b == 0:
            classification_model.partial_fit(scaling_method_x, y, classes = classes)
        else:
            classification_model.partial_fit(scaling_method_x, y)


        probability = classification_model.predict_proba(scaling_method_x)
        model_predictions = probability.argmax(axis = 1)


        accuracy = accuracy_score(y,  model_predictions)
        loss = log_loss(y, probability, labels = classes)

        print(f"Model Current Batch: {b:03d}---- Accuracy: {accuracy:.3f} ---- Loss: {loss:.3f}")

    return scaling_method, classification_model

In [48]:
if __name__ == "__main__":
    scaling_method, classification_model = online_learning_streaming_classification(n_batchs = 50,
                                            batch_size = 2048, 
                                            n_features = 25)

Model Current Batch: 000---- Accuracy: 0.631 ---- Loss: 9.677
Model Current Batch: 001---- Accuracy: 0.606 ---- Loss: 7.470
Model Current Batch: 002---- Accuracy: 0.656 ---- Loss: 5.522
Model Current Batch: 003---- Accuracy: 0.594 ---- Loss: 5.820
Model Current Batch: 004---- Accuracy: 0.646 ---- Loss: 3.742
Model Current Batch: 005---- Accuracy: 0.682 ---- Loss: 2.898
Model Current Batch: 006---- Accuracy: 0.683 ---- Loss: 2.423
Model Current Batch: 007---- Accuracy: 0.640 ---- Loss: 2.393
Model Current Batch: 008---- Accuracy: 0.652 ---- Loss: 1.983
Model Current Batch: 009---- Accuracy: 0.657 ---- Loss: 2.132
Model Current Batch: 010---- Accuracy: 1.000 ---- Loss: 0.003
Model Current Batch: 011---- Accuracy: 0.999 ---- Loss: 0.004
Model Current Batch: 012---- Accuracy: 0.999 ---- Loss: 0.002
Model Current Batch: 013---- Accuracy: 1.000 ---- Loss: 0.001
Model Current Batch: 014---- Accuracy: 1.000 ---- Loss: 0.001
Model Current Batch: 015---- Accuracy: 1.000 ---- Loss: 0.001
Model Cu