In [1]:
import numpy as np
from sklearn.linear_model import Perceptron
from sklearn.datasets import make_classification
import time

# Initialize the Perceptron model
perceptron = Perceptron(max_iter=1, warm_start=True)

# Function to simulate online data stream
def online_data_stream(n_samples=1000, n_features=2):
    for _ in range(n_samples):
        X, y = make_classification(n_samples=1, n_features=n_features, n_informative=n_features, n_redundant=0, random_state=None)
        yield X[0], y[0]

# Generate and process the online data stream
n_samples = 1000
test_accuracy_list = []
for i, (X_i, y_i) in enumerate(online_data_stream(n_samples=n_samples)):
    X_i = X_i.reshape(1, -1)
    y_i = np.array([y_i])

    # Update the model with the current data point
    perceptron.partial_fit(X_i, y_i, classes=np.array([0, 1]))

    # Every 100 samples, evaluate the model on a new batch of data
    if (i + 1) % 100 == 0:
        X_test, y_test = make_classification(n_samples=200, n_features=2, n_informative=2, n_redundant=0, random_state=42)
        test_score = perceptron.score(X_test, y_test)
        test_accuracy_list.append(test_score)
        print(f"Processed {i + 1} samples, Test accuracy: {test_score:.4f}")
        time.sleep(1)  # Simulate delay in data arrival

# Final evaluation after all samples are processed
print("Final Test Accuracies:", test_accuracy_list)


Processed 100 samples, Test accuracy: 0.7100
Processed 200 samples, Test accuracy: 0.5000
Processed 300 samples, Test accuracy: 0.5000
Processed 400 samples, Test accuracy: 0.5000
Processed 500 samples, Test accuracy: 0.6850
Processed 600 samples, Test accuracy: 0.6700
Processed 700 samples, Test accuracy: 0.5000
Processed 800 samples, Test accuracy: 0.5000
Processed 900 samples, Test accuracy: 0.5000
Processed 1000 samples, Test accuracy: 0.5000
Final Test Accuracies: [0.71, 0.5, 0.5, 0.5, 0.685, 0.67, 0.5, 0.5, 0.5, 0.5]
