In [2]:
import csv
import pandas as pd
import statistics
import random
import shap
import lime
import eli5
import math
from scipy import stats
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import cohen_kappa_score
from sklearn.metrics import accuracy_score

In [None]:
def Check_Significant_Wasserstein_Distance(df_1, df_2):
    significant_flag = False
    threshold = 0.04
    feature_detected = []
    
    feature_names = df_1.columns
    
    for feature in feature_names:
        df_1_feature_values = df_1[feature]
        df_2_feature_values = df_2[feature]
        
        distance = stats.wasserstein_distance(df_1_feature_values, df_2_feature_values)

        if distance > threshold:
            print("Feature Name: " + feature + " Distance: " + str(distance))
            significant_flag = True
            feature_detected.append(feature)
        
    return significant_flag, feature_detected

# Interpretation

In [None]:
def Check_Significant_Shap(model, reference_x, current_x):
    print("Checking Significant by Shap values")
    ref_shap_values = shap.TreeExplainer(model).shap_values(reference_x)[1]
    cur_shap_values = shap.TreeExplainer(model).shap_values(current_x)[1]
    
    num_instances = len(ref_shap_values)
    num_features = len(ref_shap_values[0])
    
    ref_feature_shap_values = []
    cur_feature_shap_values = []
    
    for i in range(num_features):
        ref_feature_shap_value = []
        cur_feature_shap_value = []
        for j in range(num_instances):
            ref_feature_shap_value.append(ref_shap_values[j][i])
            cur_feature_shap_value.append(cur_shap_values[j][i])
        ref_feature_shap_values.append(ref_feature_shap_value)
        cur_feature_shap_values.append(cur_feature_shap_value)
   
    update_flag = False
    
    feature_detected = []
    for i in range(num_features):
        ref_feature_shap_value = ref_feature_shap_values[i]
        cur_feature_shap_value = cur_feature_shap_values[i]
        
        ref_mean = statistics.mean(ref_feature_shap_value)
        ref_stdev = statistics.stdev(ref_feature_shap_value)
        ref_size = len(ref_feature_shap_value)
        
        cur_mean = statistics.mean(cur_feature_shap_value)
        cur_stdev = statistics.stdev(cur_feature_shap_value)
        cur_size = len(cur_feature_shap_value)
        
        significant,p = Two_Sample_T_test(ref_mean, cur_mean, ref_stdev, cur_stdev, ref_size, cur_size)
        
        if significant:
#             print("Shapley Values Drift Detected at Feature " + str(i))
#             print("P-value: " + str(p))
            update_flag = True
            feature_detected.append(i)
    return update_flag, feature_detected

In [None]:
def Check_Significant_ELI5_Local(model, reference_window, current_window):
    print("Checking Significant by ELI5 Local Explaination")
    reference_window = reference_window.loc[reference_window['10'].astype(float) == 1.0]
    reference_window_x = reference_window.drop('10',axis=1)
    ref_eli5_distribution = Get_ELI5_Explaination(model, reference_window_x)
    
    current_window = current_window.loc[current_window['10'].astype(float) == 1.0]
    current_window_x = current_window.drop('10',axis=1)
    cur_eli5_distribution = Get_ELI5_Explaination(model, current_window_x)
    
    update_flag = False
    
    feature_detected = []
    for feature in ref_eli5_distribution:
        if feature in cur_eli5_distribution:
            reference_mean = ref_eli5_distribution[feature][0]
            current_mean = cur_eli5_distribution[feature][0]
            reference_stdev = ref_eli5_distribution[feature][1]
            current_stdev = cur_eli5_distribution[feature][1]
            reference_size = ref_eli5_distribution[feature][2]
            current_size = cur_eli5_distribution[feature][2]
                        
            eli5_significant, p = Two_Sample_T_test(reference_mean,current_mean,reference_stdev,current_stdev,
                                                               reference_size,current_size)
                        
            if eli5_significant:
                print("Feature Name: " + feature)
                print("P-value: " + str(p))
                f = feature[1:]
                feature_detected.append(f)
                update_flag = True
    return update_flag, feature_detected

In [None]:
def Get_ELI5_Explaination(model, df):
    values = df.astype(float).values
    feature_data = {}
    for value in values:
        exp = eli5.explain_prediction(model, value)
        exp = eli5.format_as_dict(exp)
        weights = exp['targets'][0]['feature_weights']['pos']
        
        for i in range(len(weights)):
            temp = weights[i]
            feature = temp['feature']
            weight = temp['weight']
            if feature not in feature_data:
                feature_weights = []
                feature_weights.append(weight)
                feature_data[feature] = feature_weights
            else:
                feature_weights = feature_data[feature]
                feature_weights.append(weight)
                feature_data[feature] = feature_weights
    
    feature_distribution = {}
    for feature in feature_data:
        feature_values = feature_data[feature]
        feature_mean = statistics.mean(feature_values)
        if len(feature_values) > 1:
            feature_stdev = statistics.stdev(feature_values)
        else:
            feature_stdev = 0
        feature_distribution[feature] = [feature_mean, feature_stdev, len(feature_values)]
    return feature_distribution

In [None]:
def Check_Significant_Permutation_Importance(model, reference_window, current_window):
    print("Checking Significant by Permutation Importance")
    reference_window_x = reference_window.drop('10',axis=1)
    reference_window_y = reference_window['10']
    ref_pi = eli5.sklearn.PermutationImportance(model,random_state=42).fit(reference_window_x, reference_window_y)
    ref_means = ref_pi.feature_importances_
    ref_stdevs = ref_pi.feature_importances_std_
    ref_size = len(reference_window_y)

    current_window_x = current_window.drop('10',axis=1)
    current_window_y = current_window['10']
    cur_pi = eli5.sklearn.PermutationImportance(model,random_state=42).fit(current_window_x, current_window_y)
    cur_means = cur_pi.feature_importances_
    cur_stdevs = cur_pi.feature_importances_std_
    cur_size = len(current_window_y)
    
    update_flag = False
    feature_detected = []
    for i in range(len(ref_means)):
        ref_mean = ref_means[i]
        ref_stdev = ref_stdevs[i]
        cur_mean = cur_means[i]
        cur_stdev = cur_stdevs[i]
        
        pi_significant,p = Two_Sample_T_test(ref_mean, cur_mean, ref_stdev, cur_stdev, ref_size, cur_size)
        if pi_significant:
            print("Feature Name: " + str(i))
            print("P-value: " + str(p))
            update_flag = True
            feature_detected.append(i)
    return update_flag, feature_detected

In [None]:
def Two_Sample_T_test(expected_mean, current_mean, expected_sd, current_sd, expected_size, current_size):
    mean_diff = expected_mean - current_mean
    size_sum = math.pow(expected_sd, 2) / expected_size + math.pow(current_sd, 2) / current_size
    if size_sum == 0:
        size_sum = 0.0000000001
    t = mean_diff / math.sqrt(size_sum)
    df = expected_size + current_size - 2
    p = (1 - stats.t.cdf(t, df=df)) * 2

    if p < 0.0001:
        return True, p
    else:
        return False, p

# Adult Evaluation

In [None]:
Evaluation_Iteration = 10
Batch_Size = 2500
Drift_Batch = 9
Feature_Shift = [0,1,2,3,4,5,14,30,32,41,42,52,53,54,62,63]
Feature_Not_Shift = [6,7,8,9,10,11,12,13,15,16,17,18,19,20,21,22,
                     23,24,25,26,27,28,29,31,33,34,35,36,37,38,39,40,43,44,45,46,47,48,49,50,51,55,56,57,58,59,60,61,64]
print(len(Feature_Shift))
print(len(Feature_Not_Shift))


TPR_Total = []
FPR_Total = []
Delay_Total = []
Batch_9_Interpretation_TPR_Total = []
Batch_9_Interpretation_FPR_Total = []
Batch_10_Interpretation_TPR_Total = []
Batch_10_Interpretation_FPR_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_Feature_Adult_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_Feature_Adult_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('65', axis=1)
                    reference_window_y = reference_window_df['65']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('65', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('65', axis=1)
                    current_window_y = current_window_df['65']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1921
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1921
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        
#                         shap_drift_flag, feature_interpretation_shift_detected = Check_Significant_Shap(random_forest_distance, reference_window_x, current_window_x)
#                         eli5_flag, feature_interpretation_shift_detected = Check_Significant_ELI5_Local(random_forest_distance, reference_window_df, current_window_df)
                        PI_flag, feature_interpretation_shift_detected = Check_Significant_Permutation_Importance(random_forest_distance, reference_window_df, current_window_df)
                        print(feature_interpretation_shift_detected)
                        Interpretation_TP = 0
                        Interpretation_FP = 0
                        Interpretation_TN = 0
                        Interpretation_FN = 0
                        for feature in feature_interpretation_shift_detected:
                            if feature in Feature_Shift:
                                Interpretation_TP = Interpretation_TP + 1
                            else:
                                Interpretation_FP = Interpretation_FP + 1
                        for feature in Feature_Shift:
                            if feature not in feature_interpretation_shift_detected:
                                Interpretation_FN = Interpretation_FN + 1
                        for featurea in Feature_Not_Shift:
                            if featurea not in feature_interpretation_shift_detected:
                                Interpretation_TN = Interpretation_TN + 1
#                         print(Interpretation_TP)
#                         print(Interpretation_FN)
#                         print(Interpretation_TN)
#                         print(Interpretation_FP)
                        Interpretation_TPR = Interpretation_TP / (Interpretation_TP + Interpretation_FN)
                        Interpretation_FPR = Interpretation_FP / (Interpretation_FP + Interpretation_TN)
#                         print(Interpretation_TPR)
#                         print(Interpretation_FPR)
                        if batch_index == 9:
                            Batch_9_Interpretation_TPR_Total.append(Interpretation_TPR)
                            Batch_9_Interpretation_FPR_Total.append(Interpretation_FPR)
                        if batch_index == 10:
                            Batch_10_Interpretation_TPR_Total.append(Interpretation_TPR)
                            Batch_10_Interpretation_FPR_Total.append(Interpretation_FPR)
                        
                        
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("Feature Drift Detection")
print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

print("Feature Drift Interpretation")
mean_Batch_9_Interpretation_TPR = statistics.mean(Batch_9_Interpretation_TPR_Total)
stdev_Batch_9_Interpretation_TPR = statistics.stdev(Batch_9_Interpretation_TPR_Total)
mean_Batch_9_Interpretation_FPR = statistics.mean(Batch_9_Interpretation_FPR_Total)
stdev_Batch_9_Interpretation_FPR = statistics.stdev(Batch_9_Interpretation_FPR_Total)
print("TPR Batch 9 Total: " + str(Batch_9_Interpretation_TPR_Total))
print("FPR Batch 9 Total: " + str(Batch_9_Interpretation_FPR_Total))
print("TPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_TPR))
print("TPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_TPR))
print("FPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_FPR))
print("FPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_FPR))

mean_Batch_10_Interpretation_TPR = statistics.mean(Batch_10_Interpretation_TPR_Total)
stdev_Batch_10_Interpretation_TPR = statistics.stdev(Batch_10_Interpretation_TPR_Total)
mean_Batch_10_Interpretation_FPR = statistics.mean(Batch_10_Interpretation_FPR_Total)
stdev_Batch_10_Interpretation_FPR = statistics.stdev(Batch_10_Interpretation_FPR_Total)
print("TPR Batch 10 Total: " + str(Batch_10_Interpretation_TPR_Total))
print("FPR Batch 10 Total: " + str(Batch_10_Interpretation_FPR_Total))
print("TPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_TPR))
print("TPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_TPR))
print("FPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_FPR))
print("FPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_FPR))

# Bank Evaluation

In [None]:
Evaluation_Iteration = 2
Batch_Size = 2500
Drift_Batch = 9
Feature_Shift = [0,2,3,6,8,9,29,31,34,36,44,47]
Feature_Not_Shift = [1,4,5,7,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,32,33,35,37,38,39,40,41,42,43,45,46]
# print(len(Feature_Shift))
# print(len(Feature_Not_Shift))

TPR_Total = []
FPR_Total = []
Delay_Total = []
Batch_9_Interpretation_TPR_Total = []
Batch_9_Interpretation_FPR_Total = []


for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_Feature_Bank_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_Feature_Bank_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                print("Currently Working on Batch " + str(batch_index))
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    reference_window_y = reference_window_df['48']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('48', axis=1)
                    current_window_y = current_window_df['48']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 106
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 106
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        
#                         shap_drift_flag, feature_interpretation_shift_detected = Check_Significant_Shap(random_forest_distance, reference_window_x, current_window_x)
#                         eli5_flag, feature_interpretation_shift_detected = Check_Significant_ELI5_Local(random_forest_distance, reference_window_df, current_window_df)
#                         PI_flag, feature_interpretation_shift_detected = Check_Significant_Permutation_Importance(random_forest_distance, reference_window_df, current_window_df)
#                         print(feature_interpretation_shift_detected)
#                         Interpretation_TP = 0
#                         Interpretation_FP = 0
#                         Interpretation_TN = 0
#                         Interpretation_FN = 0
#                         for feature in feature_interpretation_shift_detected:
#                             if feature in Feature_Shift:
#                                 Interpretation_TP = Interpretation_TP + 1
#                             else:
#                                 Interpretation_FP = Interpretation_FP + 1
#                         for feature in Feature_Shift:
#                             if feature not in feature_interpretation_shift_detected:
#                                 Interpretation_FN = Interpretation_FN + 1
#                         for featurea in Feature_Not_Shift:
#                             if featurea not in feature_interpretation_shift_detected:
#                                 Interpretation_TN = Interpretation_TN + 1
#                         print(Interpretation_TP)
#                         print(Interpretation_FN)
#                         print(Interpretation_TN)
#                         print(Interpretation_FP)
#                         Interpretation_TPR = Interpretation_TP / (Interpretation_TP + Interpretation_FN)
#                         Interpretation_FPR = Interpretation_FP / (Interpretation_FP + Interpretation_TN)
#                         print(Interpretation_TPR)
#                         print(Interpretation_FPR)
#                         if batch_index == 9:
#                             Batch_9_Interpretation_TPR_Total.append(Interpretation_TPR)
#                             Batch_9_Interpretation_FPR_Total.append(Interpretation_FPR)
                        
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# print("Feature Drift Interpretation")
# mean_Batch_9_Interpretation_TPR = statistics.mean(Batch_9_Interpretation_TPR_Total)
# stdev_Batch_9_Interpretation_TPR = statistics.stdev(Batch_9_Interpretation_TPR_Total)
# mean_Batch_9_Interpretation_FPR = statistics.mean(Batch_9_Interpretation_FPR_Total)
# stdev_Batch_9_Interpretation_FPR = statistics.stdev(Batch_9_Interpretation_FPR_Total)
# print("TPR Batch 9 Total: " + str(Batch_9_Interpretation_TPR_Total))
# print("FPR Batch 9 Total: " + str(Batch_9_Interpretation_FPR_Total))
# print("TPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_TPR))
# print("TPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_TPR))
# print("FPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_FPR))
# print("FPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_FPR))

# Credit Evaluation

In [None]:
Evaluation_Iteration = 10
Batch_Size = 1500
Drift_Batch = 10
Feature_Shift = [4,5,6,7,8,9,16]
Feature_Not_Shift = [0,1,2,3,10,11,12,13,14,15,17,18,19,20,21,22,23,24,25]
print(len(Feature_Shift))
print(len(Feature_Not_Shift))

TPR_Total = []
FPR_Total = []
Delay_Total = []
Batch_10_Interpretation_TPR_Total = []
Batch_10_Interpretation_FPR_Total = []


for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_Feature_Credit_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_Feature_Credit_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('26', axis=1)
                    reference_window_y = reference_window_df['26']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('26', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('26', axis=1)
                    current_window_y = current_window_df['26']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        
#                         shap_drift_flag, feature_interpretation_shift_detected = Check_Significant_Shap(random_forest_distance, reference_window_x, current_window_x)
#                         eli5_flag, feature_interpretation_shift_detected = Check_Significant_ELI5_Local(random_forest_distance, reference_window_df, current_window_df)
                        PI_flag, feature_interpretation_shift_detected = Check_Significant_Permutation_Importance(random_forest_distance, reference_window_df, current_window_df)
                        print(feature_interpretation_shift_detected)
                        Interpretation_TP = 0
                        Interpretation_FP = 0
                        Interpretation_TN = 0
                        Interpretation_FN = 0
                        for feature in feature_interpretation_shift_detected:
                            if feature in Feature_Shift:
                                Interpretation_TP = Interpretation_TP + 1
                            else:
                                Interpretation_FP = Interpretation_FP + 1
                        for feature in Feature_Shift:
                            if feature not in feature_interpretation_shift_detected:
                                Interpretation_FN = Interpretation_FN + 1
                        for featurea in Feature_Not_Shift:
                            if featurea not in feature_interpretation_shift_detected:
                                Interpretation_TN = Interpretation_TN + 1
#                         print(Interpretation_TP)
#                         print(Interpretation_FN)
#                         print(Interpretation_TN)
#                         print(Interpretation_FP)
                        Interpretation_TPR = Interpretation_TP / (Interpretation_TP + Interpretation_FN)
                        Interpretation_FPR = Interpretation_FP / (Interpretation_FP + Interpretation_TN)
#                         print(Interpretation_TPR)
#                         print(Interpretation_FPR)

                        if batch_index == 10:
                            Batch_10_Interpretation_TPR_Total.append(Interpretation_TPR)
                            Batch_10_Interpretation_FPR_Total.append(Interpretation_FPR)
                        
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

mean_Batch_10_Interpretation_TPR = statistics.mean(Batch_10_Interpretation_TPR_Total)
stdev_Batch_10_Interpretation_TPR = statistics.stdev(Batch_10_Interpretation_TPR_Total)
mean_Batch_10_Interpretation_FPR = statistics.mean(Batch_10_Interpretation_FPR_Total)
stdev_Batch_10_Interpretation_FPR = statistics.stdev(Batch_10_Interpretation_FPR_Total)
print("TPR Batch 10 Total: " + str(Batch_10_Interpretation_TPR_Total))
print("FPR Batch 10 Total: " + str(Batch_10_Interpretation_FPR_Total))
print("TPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_TPR))
print("TPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_TPR))
print("FPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_FPR))
print("FPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_FPR))

# Gamma Evaluation

In [None]:
Evaluation_Iteration = 10
Batch_Size = 1000
Drift_Batch = 9
Feature_Shift = [0,1,8]
Feature_Not_Shift = [2,3,4,5,6,7,9]
print(len(Feature_Shift))
print(len(Feature_Not_Shift))



TPR_Total = []
FPR_Total = []
Delay_Total = []
Batch_9_Interpretation_TPR_Total = []
Batch_9_Interpretation_FPR_Total = []
Batch_10_Interpretation_TPR_Total = []
Batch_10_Interpretation_FPR_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_Feature_Gamma_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_Feature_Gamma_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    reference_window_y = reference_window_df['10']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('10', axis=1)
                    current_window_y = current_window_df['10']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 510
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 510
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        
#                         shap_drift_flag, feature_interpretation_shift_detected = Check_Significant_Shap(random_forest_distance, reference_window_x, current_window_x)
#                         eli5_flag, feature_interpretation_shift_detected = Check_Significant_ELI5_Local(random_forest_distance, reference_window_df, current_window_df)
                        PI_flag, feature_interpretation_shift_detected = Check_Significant_Permutation_Importance(random_forest_distance, reference_window_df, current_window_df)
                        print(feature_interpretation_shift_detected)
                        Interpretation_TP = 0
                        Interpretation_FP = 0
                        Interpretation_TN = 0
                        Interpretation_FN = 0
                        for feature in feature_interpretation_shift_detected:
                            if feature in Feature_Shift:
                                Interpretation_TP = Interpretation_TP + 1
                            else:
                                Interpretation_FP = Interpretation_FP + 1
                        for feature in Feature_Shift:
                            if feature not in feature_interpretation_shift_detected:
                                Interpretation_FN = Interpretation_FN + 1
                        for featurea in Feature_Not_Shift:
                            if featurea not in feature_interpretation_shift_detected:
                                Interpretation_TN = Interpretation_TN + 1
#                         print(Interpretation_TP)
#                         print(Interpretation_FN)
#                         print(Interpretation_TN)
#                         print(Interpretation_FP)
                        Interpretation_TPR = Interpretation_TP / (Interpretation_TP + Interpretation_FN)
                        Interpretation_FPR = Interpretation_FP / (Interpretation_FP + Interpretation_TN)
#                         print(Interpretation_TPR)
#                         print(Interpretation_FPR)
                        if batch_index == 9:
                            Batch_9_Interpretation_TPR_Total.append(Interpretation_TPR)
                            Batch_9_Interpretation_FPR_Total.append(Interpretation_FPR)
                        if batch_index == 10:
                            Batch_10_Interpretation_TPR_Total.append(Interpretation_TPR)
                            Batch_10_Interpretation_FPR_Total.append(Interpretation_FPR)
                        
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

print("Feature Drift Interpretation")
mean_Batch_9_Interpretation_TPR = statistics.mean(Batch_9_Interpretation_TPR_Total)
stdev_Batch_9_Interpretation_TPR = statistics.stdev(Batch_9_Interpretation_TPR_Total)
mean_Batch_9_Interpretation_FPR = statistics.mean(Batch_9_Interpretation_FPR_Total)
stdev_Batch_9_Interpretation_FPR = statistics.stdev(Batch_9_Interpretation_FPR_Total)
print("TPR Batch 9 Total: " + str(Batch_9_Interpretation_TPR_Total))
print("FPR Batch 9 Total: " + str(Batch_9_Interpretation_FPR_Total))
print("TPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_TPR))
print("TPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_TPR))
print("FPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_FPR))
print("FPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_FPR))

mean_Batch_10_Interpretation_TPR = statistics.mean(Batch_10_Interpretation_TPR_Total)
stdev_Batch_10_Interpretation_TPR = statistics.stdev(Batch_10_Interpretation_TPR_Total)
mean_Batch_10_Interpretation_FPR = statistics.mean(Batch_10_Interpretation_FPR_Total)
stdev_Batch_10_Interpretation_FPR = statistics.stdev(Batch_10_Interpretation_FPR_Total)
print("TPR Batch 10 Total: " + str(Batch_10_Interpretation_TPR_Total))
print("FPR Batch 10 Total: " + str(Batch_10_Interpretation_FPR_Total))
print("TPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_TPR))
print("TPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_TPR))
print("FPR Batch 10 Interpretation Mean: " + str(mean_Batch_10_Interpretation_FPR))
print("FPR Batch 10 Interpretation Std: " + str(stdev_Batch_10_Interpretation_FPR))

# SEA Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 20

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_MOA_SEA_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_MOA_RBF_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('3', axis=1)
                    reference_window_y = reference_window_df['3']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('3', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('3', axis=1)
                    current_window_y = current_window_df['3']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

# RBF Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 20

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
#     with open('Datasets/Synthetic_MOA_RBF_Abrupt.csv') as csv_file:
    with open('Datasets/Synthetic_MOA_RBF_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    reference_window_y = reference_window_df['10']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('10', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('10', axis=1)
                    current_window_y = current_window_df['10']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

In [None]:
Batch_Size = 2500
batch_index = 0
batch = []
batches = []

reference_window = []
current_window = []

drifts_detected = []
eval_accuracy_no_change = []
eval_accuracy_distance = []
    
with open('Datasets/Synthetic_MOA_RBF_Abrupt.csv') as csv_file:
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        batch.append(row)
        if len(batch) == Batch_Size:
            batches.append(batch_index)
            print("Currently Working on Batch " + str(batch_index))
            if batch_index == 0:
                reference_window.extend(batch)
                reference_window_df = pd.DataFrame(reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('10', axis=1)
                reference_window_y = reference_window_df['10']
                
                random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_distance.fit(reference_window_x, reference_window_y)
                
                pred_no_change = random_forest_no_change.predict(reference_window_x)
                accuracy_no_change = accuracy_score(reference_window_y, pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
                    
                pred_distance = random_forest_distance.predict(reference_window_x)
                accuracy_distance = accuracy_score(reference_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
                
                    
                batch = []
                batch_index = batch_index + 1
            else:
                current_window.extend(batch)
                sample_reference_window = random.sample(reference_window, len(current_window))
                    
                reference_window_df = pd.DataFrame(sample_reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('10', axis=1)
                    
                current_window_df = pd.DataFrame(current_window)
                current_window_df.columns = current_window_df.columns.astype(str)
                current_window_x = current_window_df.drop('10', axis=1)
                current_window_y = current_window_df['10']
                    
                pred_no_change = random_forest_no_change.predict(current_window_x)
                accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
                    
                pred_distance = random_forest_distance.predict(current_window_x)
                accuracy_distance = accuracy_score(current_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
                    
                drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                if drift_flag:
                    print('Drift Detected at Batch ' + str(batch_index))
                    drifts_detected.append(batch_index)
                    random_forest_distance = RandomForestClassifier(n_estimators=20)
                    random_forest_distance.fit(current_window_x, current_window_y)
                    reference_window = []
                    reference_window.extend(current_window)
        
#                     reference_window = []
#                     reference_window.extend(current_window)
                current_window = []
                batch = []
                batch_index = batch_index + 1
            
print(batches)
print(eval_accuracy_no_change)
print(eval_accuracy_distance)
print(drifts_detected)

# Agrawal Evaluation

In [None]:
Evaluation_Iteration = 50
Batch_Size = 2500
Drift_Batch = 20

TPR_Total = []
FPR_Total = []
Delay_Total = []

for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_MOA_Agrawal_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_MOA_RBF_Gradual.csv') as csv_file:
        drifts_detected = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('9', axis=1)
                    reference_window_y = reference_window_df['9']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('9', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('9', axis=1)
                    current_window_y = current_window_df['9']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    
                    if batch_index == Drift_Batch:
                        if drift_flag:
                            Delay = Batch_Size - 1
                            Delay_Stop_Count_Flag = True
                        else:
                            Delay = Batch_Size
                    elif batch_index > Drift_Batch:
                        if not(Delay_Stop_Count_Flag):
                            if drift_flag:
                                Delay = Delay + Batch_Size - 1
                                Delay_Stop_Count_Flag = True
                            else:
                                Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
                        if batch_index == Drift_Batch:
                            TP = TP + 1
                        else:
                            FP = FP + 1
                    else:
                        reference_window.extend(current_window)
                        if batch_index == Drift_Batch:
                            FN = FN + 1
                        else:
                            TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
        TPR = TP / (TP + FN)
        FPR = FP / (FP + TN)
        TPR_Total.append(TPR)
        FPR_Total.append(FPR)
        Delay_Total.append(Delay)

mean_TPR = statistics.mean(TPR_Total)
stdev_TPR = statistics.stdev(TPR_Total)
mean_FPR = statistics.mean(FPR_Total)
stdev_FPR = statistics.stdev(FPR_Total)
mean_Delay = statistics.mean(Delay_Total)
stdev_Delay = statistics.stdev(Delay_Total)

print("TPR Total: " + str(TPR_Total))
print("FPR Total: " + str(FPR_Total))
print("Delay Total: " + str(Delay_Total))
print("TPR Mean: " + str(mean_TPR))
print("TPR Std: " + str(stdev_TPR))
print("FPR Mean: " + str(mean_FPR))
print("FPR Std: " + str(stdev_FPR))
print("Delay Mean: " + str(mean_Delay))
print("Delay Std: " + str(stdev_Delay))

In [None]:
batch_size = 1440

batch_index = 0
batch = []

reference_window = []
current_window = []

drifts_detected = []
eval_accuracy_no_change = []
eval_accuracy_distance = []

with open('Datasets/Real_ElecNorm.csv') as csv_file:
    drifts_detected = []
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        batch.append(row)
        if len(batch) == batch_size:
            if batch_index == 0:
                print("Currently working on batch " + str(batch_index))
                reference_window.extend(batch)
                reference_window_df = pd.DataFrame(reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('8', axis=1)
                reference_window_x = reference_window_x.drop('0',axis=1)
                reference_window_y = reference_window_df['8']
                
#                 print("Train Initial Classifier")
                random_forest_no_change = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_no_change.fit(reference_window_x, reference_window_y)
                
                random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_distance.fit(reference_window_x, reference_window_y)
                
                pred_no_change = random_forest_no_change.predict(reference_window_x)
                accuracy_no_change = accuracy_score(reference_window_y,pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
#                 print("Prediction Accuracy - No Change: " + str(accuracy_no_change))
                
                pred_distance = random_forest_distance.predict(reference_window_x)
                accuracy_distance = accuracy_score(reference_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
#                 print("Prediction Accuracy - Distance: " + str(accuracy_distance))
                
                batch = []
                batch_index = batch_index + 1
            else:
                print("Currently working on batch " + str(batch_index))
                print(len(reference_window))
#                 reference_window_df = pd.DataFrame(sample_reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('8', axis=1)
                reference_window_x = reference_window_x.drop('0',axis=1)

                current_window.extend(batch)
                sample_reference_window = random.sample(reference_window, len(current_window))
                
                current_window_df = pd.DataFrame(current_window)
                current_window_df.columns = current_window_df.columns.astype(str)
                current_window_x = current_window_df.drop('8', axis=1)
                current_window_x = current_window_x.drop('0',axis=1)
                current_window_y = current_window_df['8']
                
                pred_no_change = random_forest_no_change.predict(current_window_x)
                accuracy_no_change = accuracy_score(current_window_y,pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
#                 print("Prediction Accuracy - No Change: " + str(accuracy_no_change))
                
                pred_distance = random_forest_distance.predict(current_window_x)
                accuracy_distance = accuracy_score(current_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
#                 print("Prediction Accuracy - Distance: " + str(accuracy_distance))
                
#                 chunks.append(chunk_num)
                
                drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                
                if drift_flag:
                    drifts_detected.append(batch_index)
                    print('Drift Detected at batch ' + str(batch_index))
                    reference_window = []
                    reference_window.extend(current_window)
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(current_window_x,current_window_y)
                else:
                    reference_window.extend(current_window)
                    
                current_window = []
                batch = []
                batch_index = batch_index + 1

# Phishing Evaluation

In [None]:
batch_size = 1000

batch_index = 0
batch = []

reference_window = []
current_window = []

drifts_detected = []
eval_accuracy_no_change = []
eval_accuracy_distance = []

with open('Datasets/Real_Phishing.csv') as csv_file:
    drifts_detected = []
    csv_reader = csv.reader(csv_file)
    for row in csv_reader:
        batch.append(row)
        if len(batch) == batch_size:
            if batch_index == 0:
                print("Currently working on batch " + str(batch_index))
                reference_window.extend(batch)
                reference_window_df = pd.DataFrame(reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('46', axis=1)
                reference_window_y = reference_window_df['46']
                
#                 print("Train Initial Classifier")
                random_forest_no_change = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_no_change.fit(reference_window_x, reference_window_y)
                
                random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                random_forest_distance.fit(reference_window_x, reference_window_y)
                
                pred_no_change = random_forest_no_change.predict(reference_window_x)
                accuracy_no_change = accuracy_score(reference_window_y,pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
#                 print("Prediction Accuracy - No Change: " + str(accuracy_no_change))
                
                pred_distance = random_forest_distance.predict(reference_window_x)
                accuracy_distance = accuracy_score(reference_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
#                 print("Prediction Accuracy - Distance: " + str(accuracy_distance))
                
                batch = []
                batch_index = batch_index + 1
            else:
                print("Currently working on batch " + str(batch_index))
                current_window.extend(batch)
                sample_reference_window = random.sample(reference_window, len(current_window))
                print(len(reference_window))
                print(len(sample_reference_window))
                reference_window_df = pd.DataFrame(reference_window)
                reference_window_df.columns = reference_window_df.columns.astype(str)
                reference_window_x = reference_window_df.drop('46', axis=1)

                
                sample_reference_window = random.sample(reference_window, len(current_window))
                
                current_window_df = pd.DataFrame(current_window)
                current_window_df.columns = current_window_df.columns.astype(str)
                current_window_x = current_window_df.drop('46', axis=1)
                current_window_y = current_window_df['46']
                
                pred_no_change = random_forest_no_change.predict(current_window_x)
                accuracy_no_change = accuracy_score(current_window_y,pred_no_change)
                eval_accuracy_no_change.append(accuracy_no_change)
#                 print("Prediction Accuracy - No Change: " + str(accuracy_no_change))
                
                pred_distance = random_forest_distance.predict(current_window_x)
                accuracy_distance = accuracy_score(current_window_y, pred_distance)
                eval_accuracy_distance.append(accuracy_distance)
#                 print("Prediction Accuracy - Distance: " + str(accuracy_distance))
                
#                 chunks.append(chunk_num)
                
                drift_flag, drift_feature = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                
                if drift_flag:
                    drifts_detected.append(batch_index)
                    print('Drift Detected at batch ' + str(batch_index))
#                     reference_window = []
#                     reference_window.extend(current_window)
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(current_window_x,current_window_y)
#                 else:
#                     reference_window.extend(current_window)
                
                reference_window = []
                reference_window.extend(current_window)
                current_window = []
                batch = []
                batch_index = batch_index + 1

# Example

In [12]:
def Check_Significant_Wasserstein_Distance(df_1, df_2):
    significant_flag = False
    threshold = 0.05
    feature_detected = []
    feature_distance_example = []
    
    feature_names = df_1.columns
    
    for feature in feature_names:
        df_1_feature_values = df_1[feature]
        df_2_feature_values = df_2[feature]
        
        distance = stats.wasserstein_distance(df_1_feature_values, df_2_feature_values)
        
        if feature in ['29','30','31','32','33','34']:
            print("Feature Name: " + feature + " Distance: " + str(distance))
            feature_distance_example.append(distance)

        if distance > threshold:
            print("Feature Name: " + feature + " Distance: " + str(distance))
            significant_flag = True
            feature_detected.append(feature)
        
    return significant_flag, feature_detected, feature_distance_example

In [14]:
Evaluation_Iteration = 1
Batch_Size = 2500
# Drift_Batch = 9
# Feature_Shift = [0,2,3,6,8,9,29,31,34,36,44,47]
# Feature_Not_Shift = [1,4,5,7,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,30,32,33,35,37,38,39,40,41,42,43,45,46]
# print(len(Feature_Shift))
# print(len(Feature_Not_Shift))

# TPR_Total = []
# FPR_Total = []
# Delay_Total = []
# Batch_9_Interpretation_TPR_Total = []
# Batch_9_Interpretation_FPR_Total = []


for i in range(Evaluation_Iteration):
    print("Currently Working on Iteration: " + str(i+1))
    
    batch_index = 0
    batch = []

    reference_window = []
    current_window = []

    drifts_detected = []
    eval_accuracy_no_change = []
    eval_accuracy_distance = []
    
    TP = 0
    TN = 0
    FP = 0
    FN = 0
    Delay = 0
    Delay_Stop_Count_Flag = False
    
    with open('Datasets/Synthetic_Feature_Bank_Abrupt.csv') as csv_file:
#     with open('Datasets/Synthetic_Feature_Bank_Gradual.csv') as csv_file:
        drifts_detected = []
        feature_distance = []
        csv_reader = csv.reader(csv_file)
        for row in csv_reader:
            batch.append(row)
            if len(batch) == Batch_Size:
                print("Currently Working on Batch " + str(batch_index))
                if batch_index == 0:
                    reference_window.extend(batch)
                    reference_window_df = pd.DataFrame(reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    reference_window_y = reference_window_df['48']
                
                    random_forest_no_change = RandomForestClassifier(n_estimators=20,random_state=42)
                    random_forest_no_change.fit(reference_window_x, reference_window_y)
                    
                    random_forest_distance = RandomForestClassifier(n_estimators=20, random_state=42)
                    random_forest_distance.fit(reference_window_x, reference_window_y)
                    
                    batch = []
                    batch_index = batch_index + 1
                else:
                    current_window.extend(batch)
                    
                    sample_reference_window = random.sample(reference_window, len(current_window))
                    
#                     print(len(reference_window))
#                     print(len(sample_reference_window))
                    
                    reference_window_df = pd.DataFrame(sample_reference_window)
                    reference_window_df.columns = reference_window_df.columns.astype(str)
                    reference_window_x = reference_window_df.drop('48', axis=1)
                    
                    current_window_df = pd.DataFrame(current_window)
                    current_window_df.columns = current_window_df.columns.astype(str)
                    current_window_x = current_window_df.drop('48', axis=1)
                    current_window_y = current_window_df['48']
                    
                    pred_no_change = random_forest_no_change.predict(current_window_x)
                    accuracy_no_change = accuracy_score(current_window_y, pred_no_change)
                    eval_accuracy_no_change.append(accuracy_no_change)
                    
                    pred_distance = random_forest_distance.predict(current_window_x)
                    accuracy_distance = accuracy_score(current_window_y, pred_distance)
                    eval_accuracy_distance.append(accuracy_distance)
                    
                    drift_flag, drift_feature, feature_distance_example = Check_Significant_Wasserstein_Distance(reference_window_x, current_window_x)
                    feature_distance.append(feature_distance_example)
#                     if batch_index == Drift_Batch:
#                         if drift_flag:
#                             Delay = Batch_Size - 106
#                             Delay_Stop_Count_Flag = True
#                         else:
#                             Delay = Batch_Size
#                     elif batch_index > Drift_Batch:
#                         if not(Delay_Stop_Count_Flag):
#                             if drift_flag:
#                                 Delay = Delay + Batch_Size - 106
#                                 Delay_Stop_Count_Flag = True
#                             else:
#                                 Delay + Batch_Size
                    
                    if drift_flag:
                        print('Drift Detected at Batch ' + str(batch_index))
                        
#                         shap_drift_flag, feature_interpretation_shift_detected = Check_Significant_Shap(random_forest_distance, reference_window_x, current_window_x)
#                         eli5_flag, feature_interpretation_shift_detected = Check_Significant_ELI5_Local(random_forest_distance, reference_window_df, current_window_df)
#                         PI_flag, feature_interpretation_shift_detected = Check_Significant_Permutation_Importance(random_forest_distance, reference_window_df, current_window_df)
#                         print(feature_interpretation_shift_detected)
#                         Interpretation_TP = 0
#                         Interpretation_FP = 0
#                         Interpretation_TN = 0
#                         Interpretation_FN = 0
#                         for feature in feature_interpretation_shift_detected:
#                             if feature in Feature_Shift:
#                                 Interpretation_TP = Interpretation_TP + 1
#                             else:
#                                 Interpretation_FP = Interpretation_FP + 1
#                         for feature in Feature_Shift:
#                             if feature not in feature_interpretation_shift_detected:
#                                 Interpretation_FN = Interpretation_FN + 1
#                         for featurea in Feature_Not_Shift:
#                             if featurea not in feature_interpretation_shift_detected:
#                                 Interpretation_TN = Interpretation_TN + 1
#                         print(Interpretation_TP)
#                         print(Interpretation_FN)
#                         print(Interpretation_TN)
#                         print(Interpretation_FP)
#                         Interpretation_TPR = Interpretation_TP / (Interpretation_TP + Interpretation_FN)
#                         Interpretation_FPR = Interpretation_FP / (Interpretation_FP + Interpretation_TN)
#                         print(Interpretation_TPR)
#                         print(Interpretation_FPR)
#                         if batch_index == 9:
#                             Batch_9_Interpretation_TPR_Total.append(Interpretation_TPR)
#                             Batch_9_Interpretation_FPR_Total.append(Interpretation_FPR)
                        
                        drifts_detected.append(batch_index)
                        random_forest_distance = RandomForestClassifier(n_estimators=20)
                        random_forest_distance.fit(current_window_x, current_window_y)
                        reference_window = []
                        reference_window.extend(current_window)
                        
#                         if batch_index == Drift_Batch:
#                             TP = TP + 1
#                         else:
#                             FP = FP + 1
#                     else:
#                         reference_window.extend(current_window)
#                         if batch_index == Drift_Batch:
#                             FN = FN + 1
#                         else:
#                             TN = TN + 1
#                     reference_window = []
#                     reference_window.extend(current_window)
                    current_window = []
                    batch = []
                    batch_index = batch_index + 1
                    
#         TPR = TP / (TP + FN)
#         FPR = FP / (FP + TN)
#         TPR_Total.append(TPR)
#         FPR_Total.append(FPR)
#         Delay_Total.append(Delay)

# mean_TPR = statistics.mean(TPR_Total)
# stdev_TPR = statistics.stdev(TPR_Total)
# mean_FPR = statistics.mean(FPR_Total)
# stdev_FPR = statistics.stdev(FPR_Total)
# mean_Delay = statistics.mean(Delay_Total)
# stdev_Delay = statistics.stdev(Delay_Total)

# print("TPR Total: " + str(TPR_Total))
# print("FPR Total: " + str(FPR_Total))
# print("Delay Total: " + str(Delay_Total))
# print("TPR Mean: " + str(mean_TPR))
# print("TPR Std: " + str(stdev_TPR))
# print("FPR Mean: " + str(mean_FPR))
# print("FPR Std: " + str(stdev_FPR))
# print("Delay Mean: " + str(mean_Delay))
# print("Delay Std: " + str(stdev_Delay))

# print("Feature Drift Interpretation")
# mean_Batch_9_Interpretation_TPR = statistics.mean(Batch_9_Interpretation_TPR_Total)
# stdev_Batch_9_Interpretation_TPR = statistics.stdev(Batch_9_Interpretation_TPR_Total)
# mean_Batch_9_Interpretation_FPR = statistics.mean(Batch_9_Interpretation_FPR_Total)
# stdev_Batch_9_Interpretation_FPR = statistics.stdev(Batch_9_Interpretation_FPR_Total)
# print("TPR Batch 9 Total: " + str(Batch_9_Interpretation_TPR_Total))
# print("FPR Batch 9 Total: " + str(Batch_9_Interpretation_FPR_Total))
# print("TPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_TPR))
# print("TPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_TPR))
# print("FPR Batch 9 Interpretation Mean: " + str(mean_Batch_9_Interpretation_FPR))
# print("FPR Batch 9 Interpretation Std: " + str(stdev_Batch_9_Interpretation_FPR))

Currently Working on Iteration: 1
Currently Working on Batch 0
Currently Working on Batch 1
Feature Name: 29 Distance: 0.009599999999999997
Feature Name: 30 Distance: 0.008399999999999963
Feature Name: 31 Distance: 0.017999999999999905
Feature Name: 32 Distance: 0.0015999999999999348
Feature Name: 33 Distance: 0.0016000000000000458
Feature Name: 34 Distance: 0.0007999999999999119
Currently Working on Batch 2
Feature Name: 29 Distance: 0.0
Feature Name: 30 Distance: 0.00759999999999994
Feature Name: 31 Distance: 0.00759999999999994
Feature Name: 32 Distance: 0.00039999999999995595
Feature Name: 33 Distance: 0.007199999999999984
Feature Name: 34 Distance: 0.005599999999999938
Currently Working on Batch 3
Feature Name: 29 Distance: 0.004400000000000015
Feature Name: 30 Distance: 0.005599999999999938
Feature Name: 31 Distance: 0.0011999999999999789
Feature Name: 32 Distance: 0.0011999999999999789
Feature Name: 33 Distance: 0.0015999999999999348
Feature Name: 34 Distance: 0.0027999999999999

In [19]:
# print(len(feature_distance))
feature_29_distance = []
feature_30_distance = []
feature_31_distance = []
feature_32_distance = []
feature_33_distance = []
feature_34_distance = []

for f in feature_distance:
    feature_29_distance.append(f[0])
    feature_30_distance.append(f[1])
    feature_31_distance.append(f[2])
    feature_32_distance.append(f[3])
    feature_33_distance.append(f[4])
    feature_34_distance.append(f[5])

print("Feature 29: " + str(feature_29_distance))
print("Feature 30: " + str(feature_30_distance))
print("Featire 31: " + str(feature_31_distance))
print("Feature 32: " + str(feature_32_distance))
print("Feature 33: " + str(feature_33_distance))
print("Feature 34: " + str(feature_34_distance))

Feature 29: [0.009599999999999997, 0.0, 0.004400000000000015, 0.0011999999999999789, 0.011199999999999988, 0.010799999999999976, 0.014400000000000024, 0.01200000000000001, 0.058825974016399996, 0.023594805190000002, 0.010109090905600008, 0.008477922075200005, 0.0140103896072, 0.004384415584, 0.011054545451199995, 0.005480519482, 0.0174077922056]
Feature 30: [0.008399999999999963, 0.00759999999999994, 0.005599999999999938, 0.005199999999999982, 0.0008000000000000229, 0.0031999999999999806, 0.01079999999999992, 0.007199999999999984, 0.00040000000000006697, 0.0028000000000000247, 0.009600000000000053, 0.014800000000000035, 0.010000000000000009, 0.0028000000000000247, 0.008399999999999963, 0.011600000000000055, 0.0048000000000000265]
Featire 31: [0.017999999999999905, 0.00759999999999994, 0.0011999999999999789, 0.006399999999999961, 0.010399999999999965, 0.013999999999999901, 0.0252, 0.019199999999999995, 0.0252, 0.02079999999999993, 0.017199999999999993, 0.025599999999999956, 0.0259999999