In [3]:
from sklearn.datasets import load_iris
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn import metrics
from sklearn.model_selection import *
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# LOAD DATA
dataframe = pd.read_csv('ML\data\dynamic_pus_power_fixed_using_pus50000_15PUs_201912_2712_42_200_2.txt', delimiter=',', header=None)
dataframe_max = pd.read_csv('ML\data\dynamic_pus_power_fixed_max_power50000_15PUs_201912_2712_42_200.txt', delimiter=',', header=None)

dataframe.reset_index(drop=True, inplace=True)
dataframe_max.reset_index(drop=True, inplace=True)

dataframe_tot = pd.concat([dataframe, dataframe_max.iloc[:, dataframe_max.columns.values[-1]]], axis=1,
                        ignore_index=True)
idx = dataframe_tot[dataframe_tot[dataframe_tot.columns[-1]] == -float('inf')].index
dataframe_tot.drop(idx, inplace=True)

data_reg = np.concatenate((dataframe_tot.values[:, 0:dataframe_tot.shape[1]-3], 
                           dataframe_tot.values[:, dataframe_tot.shape[1]-1:dataframe_tot.shape[1]]), axis=1)
data_class = dataframe_tot.values[:, 0:dataframe_tot.shape[1]-1]
y_class_power = dataframe_tot.values[:, -1]
del dataframe, dataframe_max

In [None]:
normalized = True
m = len(data)
n = len(data[0])
X, y = [], []
for d in data:
    X.append(d[:n-1])
    y.append(float(d[-1]))
del data
X = np.asarray(X)
X = X.astype(int)
y = np.asarray(y)

In [None]:
training_samples = []
test_samples = 40000

In [None]:
k = 1
total_accuracy = []
total_f1_score = []
total_fp = []
accuracy = []
f1_score = []
fp = []
for i_train, training_sample in enumerate(training_samples):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8511, random_state=4)
    if normalized:
        mu = np.mean(X_train[:training_sample], axis=0)
        std = np.std(X_train[:training_sample], axis=0)
        X_train = (X_train[:training_sample] - mu) / std
        X_test = (X_test[:test_samples] - mu) / std
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train[:training_sample], y_train[:training_sample])
    y_pred = knn.predict(X_test[:test_samples]) # test_samples[i_train]
    accuracy.append(round(metrics.accuracy_score(y_test[:test_samples], y_pred)*100, 2))
    f1_score.append(round(metrics.f1_score(y_test[:test_samples], y_pred)*100, 2))
    idx = y_test[:test_samples]==1
    fp.append(sum(y_pred) - sum(y_pred[idx]))
total_accuracy.append(accuracy)
total_f1_score.append(f1_score)
total_fp.append(fp)

In [None]:
k = 4
accuracy = []
f1_score = []
fp = []
for i_train, training_sample in enumerate(training_samples):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8511, random_state=4)
    if normalized:
        mu = np.mean(X_train[:training_sample], axis=0)
        std = np.std(X_train[:training_sample], axis=0)
        X_train = (X_train[:training_sample] - mu) / std
        X_test = (X_test[:test_samples] - mu) / std
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(X_train[:training_sample], y_train[:training_sample])
    y_pred = knn.predict(X_test[:test_samples])
    accuracy.append(round(metrics.accuracy_score(y_test[:test_samples], y_pred)*100, 2))
    f1_score.append(round(metrics.f1_score(y_test[:test_samples], y_pred)*100, 2))
    idx = y_test[:test_samples]==1
    fp.append(sum(y_pred) - sum(y_pred[idx]))  
total_accuracy.append(accuracy)
total_f1_score.append(f1_score)
total_fp.append(fp)

In [None]:
# KNN regression
k = 4
average_reg_diff_power, best_c_reg_lst, fp_mean_power = [], [], []
for i_train, training_sample in enumerate(training_samples):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8511, random_state=4)
    if False:
        mu = np.mean(X_train[:training_sample], axis=0)
        std = np.std(X_train[:training_sample], axis=0)
        X_train = (X_train[:training_sample] - mu) / std
        X_test = (X_test - mu) / std
    knn = KNeighborsRegressor(n_neighbors=k, weights='distance')
    knn.fit(X_train[:training_sample], y_train[:training_sample])
    y_pred = knn.predict(X_test)
    
    #evaluating
    average_reg_diff_power.append(round(np.mean(np.absolute(y_test - y_pred)), 3))
    fp_samples = np.zeros(len(y_test), dtype=float)
    fp_samples[y_pred > y_test] = (y_pred - y_test)[y_pred > y_test]
    fp_mean_power.append(round(np.mean(fp_samples), 3))
    print('Number_samples: ', training_sample, ' error: ', average_reg_diff_power[-1], ', fp_error:', fp_mean_power[-1])

In [None]:
plt.figure(figsize=(15,8))
plt.plot(training_samples, total_accuracy[0])
plt.plot(training_samples, total_accuracy[1], 'r--')
plt.plot(training_samples, total_accuracy[2], 'g.-')
plt.plot(training_samples, total_accuracy[3], 'y->')
plt.xlabel('# training samples')
plt.ylabel('Percent(%)')
plt.title('Accuracy of prediction')
plt.grid(True)
# plt.text(40, 50, '# Validation = 34k')
# plt.text(400, 45, '# Test = 34k')
plt.legend(['1-NN', '4-NN', 'LR', 'NN'])
plt.savefig('ML\\results\\changing_training_test34k_4kx4k_smallVal_compare_dynamicPUS.png')

In [None]:
plt.figure(figsize=(15,8))
plt.plot(training_samples, total_fp[0])
plt.plot(training_samples, total_fp[1], 'r--')
plt.plot(training_samples, total_fp[2], 'g.-')
plt.plot(training_samples, total_fp[3], 'y->')
plt.xlabel('# training samples')
plt.ylabel('#')
plt.title('False Positive')
plt.grid(True)
# plt.text(175, 300, '# Validation = 34k')
# plt.text(175, 10, '# Test = 34k')
plt.legend(['1-NN', '4-NN', 'LR', 'NN'])
plt.savefig('ML\\results\\changing_training_test34k_4kx4k_smallVal_compare_dynamicPUS_FP.png')