In [None]:
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score
import matplotlib.pyplot as plt
from collections import deque
from joblib import Parallel, delayed
import time

np.random.seed(40)

# Define the range for x1, x2, x3
x_range = (0, 10)

# Define the sequence of θ values
theta_values = [7, 8, 12, 13, 14, 9, 8, 7, 13, 12, 13, 11, 15, 16, 14]

# Total number of samples
total_samples = 75000

# Number of samples per θ value
samples_per_theta = total_samples // len(theta_values)

# Generate the dataset using vectorized operations
x1 = np.random.uniform(*x_range, total_samples)
x2 = np.random.uniform(*x_range, total_samples)
x3 = np.random.uniform(*x_range, total_samples)
theta_repeated = np.repeat(theta_values, samples_per_theta)
labels = np.where(x1 + x2 > theta_repeated, '1', '0')

# Create a NumPy array
data = np.column_stack((x1, x2, x3, labels))

def create_datastream(data, batch_size):
    return [data[i:i+batch_size] for i in range(0, len(data), batch_size)]

def train_random_forest_classifier(X, y, n_estimators=50):
    rf = RandomForestClassifier(n_estimators=n_estimators, n_jobs=-1)
    rf.fit(X, y)
    return rf

def adaptive_ensemble_size(C, sample, unique_label, anpha, min_num=3):
    probability_list = []
    for label in unique_label:
        li = []
        for clf in C:
            probabilities = clf.predict_proba(sample)[0]
            current_proba = probabilities[clf.classes_ == label][0]
            if len(li) < min_num or abs(current_proba - li[-1]) < anpha:
                li.append(current_proba)
            else:
                break
        probability_list.append(li)
    return probability_list

def linear_regression(x, y):
    A = np.vstack([x, np.ones(len(x))]).T
    return np.linalg.lstsq(A, y, rcond=None)[0]

def tendency_prediction(probability_list, Y, epsilon=0.01):
    predicted_probabilities = []
    for li in probability_list:
        x = np.arange(1, len(li) + 1)
        y = np.array(li)
        slope, intercept = linear_regression(x, y)
        next_value = slope * (len(li) + 2) + intercept
        li.insert(0, next_value)
        weighted_prob = sum([li[x - 1] * (1 - x * epsilon) for x in range(1, len(li) + 1)]) / len(li)
        predicted_probabilities.append(weighted_prob)
    return Y[np.argmax(predicted_probabilities)]

def predict_sample(C, row, unique_labels, len_Bi_1):
    sample = row[:-1].reshape(1, -1).astype(float)
    anpha = (1500 / len_Bi_1) * 0.2
    selected_classifiers = adaptive_ensemble_size(C, sample, unique_labels, anpha)
    return tendency_prediction(selected_classifiers, unique_labels)

def process_data_stream(S, m, k, unique_labels):
    C = deque(maxlen=m)
    true_labels = []
    predicted_labels = []
    block_accuracies = []
    prediction_times = []

    for i in range(len(S) - 1):
        Bi = S[i]
        X = Bi[:, :-1].astype(float)
        y = Bi[:, -1]
        Ci = train_random_forest_classifier(X, y)
        C.append(Ci)
        if len(C) < k:
            continue
        Bi_1 = S[i + 1]
        start_time = time.time()
        block_predictions = Parallel(n_jobs=-1)(delayed(predict_sample)(C, row, unique_labels, len(Bi_1)) for row in Bi_1)
        end_time = time.time()
        
        prediction_times.append(end_time - start_time)
        
        true_labels.extend(Bi_1[:, -1])
        predicted_labels.extend(block_predictions)
        block_accuracies.append(accuracy_score(true_labels[-len(Bi_1):], predicted_labels[-len(Bi_1):]))

    precision = precision_score(true_labels, predicted_labels, average='weighted')
    recall = recall_score(true_labels, predicted_labels, average='weighted')
    f1 = f1_score(true_labels, predicted_labels, average='weighted')
    accuracy = accuracy_score(true_labels, predicted_labels)

    print(f"Precision: {precision}")
    print(f"Recall: {recall}")
    print(f"F1-Score: {f1}")
    print(f"Accuracy: {accuracy}")

    plt.figure(figsize=(10, 6))
    plt.plot(block_accuracies, marker='o', linestyle='-', color='b')
    plt.title('Accuracy of Each Block')
    plt.xlabel('Block Index')
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.show()

    plt.figure(figsize=(10, 6))
    plt.plot(prediction_times, marker='o', linestyle='-', color='r')
    plt.title('Prediction Time of Each Block')
    plt.xlabel('Block Index')
    plt.ylabel('Time (seconds)')
    plt.grid(True)
    plt.show()

batch_size = 500
S = create_datastream(data, batch_size)
m = 15
k = 3
unique_labels = np.unique(data[:, -1])
process_data_stream(S, m, k, unique_labels)

