In [1]:
import csv
import pandas as pd
import statistics
import random
from scipy import stats
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import accuracy_score

In [2]:
def Check_Significant_Energy_Distance(df_1, df_2):
    significant_flag = False
    threshold = 0.1
    feature_detected = []
    
    feature_names = df_1.columns
    
    for feature in feature_names:
        df_1_feature_values = df_1[feature]
        df_2_feature_values = df_2[feature]
        
        distance = stats.energy_distance(df_1_feature_values, df_2_feature_values)
        
        if distance > threshold:
            print("Feature Name: " + feature + " Distance: " + str(distance))
            significant_flag = True
            feature_detected.append(feature)
        
    return significant_flag, feature_detected

# Adult Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 9

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_Feature_Adult_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_Feature_Adult_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('65', axis=1)
                    reference_window_y = reference_window_df['65']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('65', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('65', axis=1)
                    current_window_y = current_window_df['65']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1921
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1921
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# Bank Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 9

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_Feature_Bank_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_Feature_Bank_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    reference_window_y = reference_window_df['48']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('48', axis=1)
                    current_window_y = current_window_df['48']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 106
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 106
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# Credit Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 1500
Drift_Batch = 10

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_Feature_Credit_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_Feature_Credit_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('26', axis=1)
                    reference_window_y = reference_window_df['26']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('26', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('26', axis=1)
                    current_window_y = current_window_df['26']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# Gamma Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 1000
Drift_Batch = 9

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_Feature_Gamma_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_Feature_Gamma_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    reference_window_y = reference_window_df['10']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('10', axis=1)
                    current_window_y = current_window_df['10']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 510
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 510
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# SEA Evaluation

In [None]:
Evaluation_Iteration = 2
Batch_Size = 5000
Drift_Batch = 10

TPR_Total = []
FPR_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    
#     with open('Datasets/Synthetic_MOA_SEA_Abrupt_1.csv') as csv_file:
#     with open('Datasets/Synthetic_MOA_SEA_Abrupt_2.csv') as csv_file:
#     with open('Datasets/Synthetic_MOA_SEA_Gradual_1.csv') as csv_file:
    with open('Datasets/Synthetic_MOA_SEA_Gradual_2.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('3', axis=1)
                    reference_window_y = reference_window_df['3']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('3', axis=1)
                    
                    current_window.extend(batch)
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('3', axis=1)
                    current_window_y = current_window_df['3']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
                    
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)

print(TPR_Total)
print(FPR_Total)
print(mean_TPR)
print(stdev_TPR)
print(mean_FPR)
print(stdev_FPR)

# RBF Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 20

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_MOA_RBF_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_MOA_RBF_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    reference_window_y = reference_window_df['10']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('10', axis=1)
                    current_window_y = current_window_df['10']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

In [None]:
Batch_Size = 2500
batch_index = 0
batch = []
batches = []

reference_window = []
current_window = []

drifts_detected = []
eval_accuracy_no_change = []
eval_accuracy_distance = []
    
with open('Datasets/Synthetic_MOA_RBF_Abrupt.csv') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        batch.append(row)
        if len(batch) == Batch_Size:
            batches.append(batch_index)
            print("Currently Working on Batch " + str(batch_index))
            if batch_index == 0:
                reference_window.extend(batch)
                reference_window_df = pd.DataFrame(reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('10', axis=1)
                reference_window_y = reference_window_df['10']
                
                random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_distance.fit(reference_window_x, reference_window_y)
                
                pred_no_change = random_forest_no_change.predict(reference_window_x)
                accuracy_no_change = accuracy_score(reference_window_y, pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
                    
                pred_distance = random_forest_distance.predict(reference_window_x)
                accuracy_distance = accuracy_score(reference_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
                
                    
                batch = []
                batch_index = batch_index + 1
            else:
                current_window.extend(batch)
                sample_reference_window = random.sample(reference_window, len(current_window))
                    
                reference_window_df = pd.DataFrame(sample_reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('10', axis=1)
                    
                current_window_df = pd.DataFrame(current_window)
                current_window_df.columns = current_window_df.columns.astype(str)
                current_window_x = current_window_df.drop('10', axis=1)
                current_window_y = current_window_df['10']
                    
                pred_no_change = random_forest_no_change.predict(current_window_x)
                accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
                    
                pred_distance = random_forest_distance.predict(current_window_x)
                accuracy_distance = accuracy_score(current_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
                    
                drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                if drift_flag:
                    print('Drift Detected at Batch ' + str(batch_index))
                    drifts_detected.append(batch_index)
                    random_forest_distance = RandomForestClassifier(n_estimators=20)
                    random_forest_distance.fit(current_window_x, current_window_y)
                    reference_window = []
                    reference_window.extend(current_window)
        
#                     reference_window = []
#                     reference_window.extend(current_window)
                current_window = []
                batch = []
                batch_index = batch_index + 1
            
print(batches)
print(eval_accuracy_no_change)
print(eval_accuracy_distance)
print(drifts_detected)

# Agrawal Evaluation

In [None]:
Evaluation_Iteration = 2
Batch_Size = 2500
Drift_Batch = 20

TPR_Total = []
FPR_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    
#     with open('Datasets/Synthetic_MOA_Agrawal_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_MOA_Agrawal_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('9', axis=1)
                    reference_window_y = reference_window_df['9']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('9', axis=1)
                    
                    current_window.extend(batch)
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('9', axis=1)
                    current_window_y = current_window_df['9']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Energy_Distance(reference_window_x, current_window_x)
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
                    
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)

print(TPR_Total)
print(FPR_Total)
print(mean_TPR)
print(stdev_TPR)
print(mean_FPR)
print(stdev_FPR)