In [5]:
from __future__ import absolute_import, print_function

# --- System ---
import os
import sys
import time
import warnings

# --- Utility ---
import pandas as pd
import numpy as np
import math
import random
import logging
import pickle
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# --- Plot ---
import matplotlib.pyplot as plt
# %matplotlib inline
import seaborn as sns

# --- Pytorch ---
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

from torch.utils.data import Dataset, DataLoader, TensorDataset
from tqdm import tqdm
from datetime import datetime
from torch.utils.data import random_split

# -----------------------------------------------------------
# random weight initialization
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
root_dir = os.getcwd()

# -----------------------------------------------------------
# data loading and preprocessing
dataPath = "data/statistics-4.csv" # "data/statistics (pacing).csv"
df = pd.read_csv(dataPath)

# Dropping columns that are not required at the moment
df = df.drop(columns=[ 'Unnamed: 0', 'UUID', 'HOSTNAME', 'ALIAS', 'TIMESTAMP',
                       'THROUGHPUT (Receiver)', 'LATENCY (min.)', 'LATENCY (max.)', 
                       'CONGESTION (Receiver)', 'BYTES (Receiver)'
                     ])

# Pre-processing
pacing = df['PACING'].values
for i, p in enumerate(pacing):
    v, _ = p.split("gbit")
    pacing[i] = float(v)

df['PACING'] = pacing
df['CONGESTION (Sender)'] = (df['CONGESTION (Sender)'] == 'cubic').astype(int)

X = df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']].values
y = df['PACING'].values
y = y.astype('int')

# Normalization
minmax_scale = preprocessing.MinMaxScaler().fit(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])
df_minmax = minmax_scale.transform(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])

final_df = pd.DataFrame(df_minmax, columns=['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)'])
X = final_df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']].values

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.25,
                                                    random_state=1)

X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_test  = torch.tensor(X_test)
y_test  = torch.tensor(y_test) 

# -----------------------------------------------------------

# Custom data loader for ELK stack dataset
class PacingDataset(Dataset):
    """
    TensorDataset with support of transforms.
    """
    def __init__(self, tensors, transform=None):
        assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
        self.tensors = tensors
        self.transform = transform

    def __getitem__(self, index):
        x = self.tensors[0][index]

        if self.transform:
            x = self.transform(x)

        y = self.tensors[1][index]

        return x, y

    def __len__(self):
        return self.tensors[0].size(0)

# -----------------------------------------------------------

# accuracy computation
def accuracy(model, ds, pct):
    # assumes model.eval()
    # percent correct within pct of true pacing rate
    n_correct = 0; n_wrong = 0

    for i in range(len(ds)):
        (X, Y) = ds[i]                # (predictors, target)
        X, Y = X.float(), Y.float()
        with torch.no_grad():
            output = model(X)         # computed price

        abs_delta = np.abs(output.item() - Y.item())
        max_allow = np.abs(pct * Y.item())
        if abs_delta < max_allow:
            n_correct +=1
        else:
            n_wrong += 1

    acc = (n_correct * 1.0) / (n_correct + n_wrong)
    return acc*100

# -----------------------------------------------------------

# model definition
class PacingOptimizer(nn.Module):
    # https://visualstudiomagazine.com/Articles/2021/02/11/pytorch-define.aspx?Page=2
    def __init__(self):
        super(PacingOptimizer, self).__init__()
        self.hid1 = torch.nn.Linear(5, 256)
        self.drop1 = torch.nn.Dropout(0.50)
        
        self.hid2 = torch.nn.Linear(256, 480)
        self.drop2 = torch.nn.Dropout(0.50)
        
        self.hid3 = torch.nn.Linear(480, 960)
        self.drop3 = torch.nn.Dropout(0.50)

        self.hid4 = torch.nn.Linear(960, 256)
        self.oupt = torch.nn.Linear(256, 1)

        torch.nn.init.xavier_uniform_(self.hid1.weight)
        torch.nn.init.zeros_(self.hid1.bias)
        torch.nn.init.xavier_uniform_(self.hid2.weight)
        torch.nn.init.zeros_(self.hid2.bias)
        torch.nn.init.xavier_uniform_(self.hid3.weight)
        torch.nn.init.zeros_(self.hid3.bias)
        torch.nn.init.xavier_uniform_(self.oupt.weight)
        torch.nn.init.zeros_(self.oupt.bias)
        self.lrelu = torch.nn.LeakyReLU()

    def forward(self, x):
        # z = self.drop1(torch.relu(self.hid1(x)))
        # z = self.drop2(torch.relu(self.hid2(z)))
        # z = self.drop3(torch.relu(self.hid3(z)))
        # z = torch.relu(self.hid4(z))
        # z = self.oupt(z)  # no activation
        z = self.drop1(self.lrelu(self.hid1(x)))
        z = self.drop2(self.lrelu(self.hid2(z)))
        z = self.drop3(self.lrelu(self.hid3(z)))
        z = self.lrelu(self.hid4(z))
        z = self.oupt(z)  # no activation
        return z

# -----------------------------------------------------------

model = PacingOptimizer()

# Hyperparameters
EPOCH = 500
BATCH = 64
LEARNING_RATE = 0.05

INTERVAL = 50
SAVE = False
BESTLOSS = 10

criterion = nn.MSELoss(reduction='mean') # 'mean', 'sum'. 'none'
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
# optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)

print("\nBatch Size = %3d " % BATCH)
print("Loss = " + str(criterion))
print("Pptimizer = Adam")
print("Max Epochs = %3d " % EPOCH)
print("Learning Rate = %0.3f " % LEARNING_RATE)

# Dataset w/o any tranformations
traindata   = PacingDataset(tensors=(X_train, y_train), transform=None)
trainloader = torch.utils.data.DataLoader(traindata, batch_size=BATCH)

testdata    = PacingDataset(tensors=(X_test, y_test), transform=None)
testloader = torch.utils.data.DataLoader(testdata, batch_size=BATCH)

print("\nStarting training with saved checkpoints")

model.train()
for epoch in range(0, EPOCH):
    torch.manual_seed(epoch+1) # recovery reproducibility
    epoch_loss = 0             # for one full epoch

    for (batch_idx, batch) in enumerate(trainloader):
        (xs, ys) = batch                # (predictors, targets)
        xs, ys = xs.float(), ys.float()
        optimizer.zero_grad()           # prepare gradients

        output = model(xs)              # predicted pacing rate
        loss = criterion(ys, output)    # avg per item in batch

        epoch_loss += loss.item()       # accumulate averages
        loss.backward()                 # compute gradients
        optimizer.step()                # update weights

    if epoch % INTERVAL == 0:
        print("Epoch = %4d    Loss = %0.4f" % (epoch, epoch_loss))

        # save checkpoint
        dt = time.strftime("%Y_%m_%d-%H_%M_%S")
        fn = str(dt) + str("-") + str(epoch) + "_ckpt.pt"

        info_dict = {
            'epoch' : epoch,
            'model_state' : model.state_dict(),
            'optimizer_state' : optimizer.state_dict()
        }
        if SAVE:
            torch.save(info_dict, fn)

print("\nDone")

# evaluate model accuracy
model.eval()
gap = 0.50
acc_train = accuracy(model, traindata, gap)
print(f"Accuracy (within {gap:.2f}) on train data = {acc_train:.2f}%")


# make prediction
tput, lat, loss, streams, cong = 0.149677, 0.577766, 1.00000, 0.0, 1.0
print(f"\nPredicting pacing rate for:\n\
    (norm. values)\n\
    throughput = {tput}\n\
    latency = {lat}\n\
    loss = {loss}\n\
    congestion = {cong}\n\
    streams = {streams}")

# converting the sample to tensor array
ukn = np.array([[tput, lat, loss, streams, cong]], dtype=np.float32)
sample = torch.tensor(ukn, dtype=torch.float32).to(device)

# testing the sample
with torch.no_grad():
    model.eval()
    pred = model(sample)
pred = pred.item()
print(f"\nPacing rate: {pred:.4f}\n")


Batch Size =  64 
Loss = MSELoss()
Pptimizer = Adam
Max Epochs = 500 
Learning Rate = 0.050 

Starting training with saved checkpoints
Epoch =    0    Loss = 2232.5319
Epoch =   50    Loss = 406.3627
Epoch =  100    Loss = 406.4533
Epoch =  150    Loss = 406.4577
Epoch =  200    Loss = 406.4579
Epoch =  250    Loss = 406.4579


KeyboardInterrupt: 

In [1]:
from __future__ import absolute_import, print_function

# --- System ---
import os
import sys
import warnings

# --- Utility ---
import pandas as pd
import numpy as np
import math
import random
import logging
import pickle
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import train_test_split
from sklearn import preprocessing

# --- Plot --
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# --- Pytorch ---
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.backends.cudnn as cudnn

from torch.utils.data import Dataset, DataLoader, TensorDataset
from tqdm import tqdm
from datetime import datetime
from torch.utils.data import random_split

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
root_dir = os.getcwd()

In [2]:
dataPath = "data/statistics-3.csv"
df = pd.read_csv(dataPath)
# columnList = df.columns

# Dropping columns that are not required at the moment
df = df.drop(columns=[ 'Unnamed: 0', 'UUID', 'HOSTNAME', 'ALIAS', 'TIMESTAMP',
                       'THROUGHPUT (Receiver)', 'LATENCY (min.)', 'LATENCY (max.)', 
                       'CONGESTION (Receiver)', 'BYTES (Receiver)'
                     ])

# Pre-processing
pacing = df['PACING'].values
for i, p in enumerate(pacing):
    v, _ = p.split("gbit")
    pacing[i] = int(v)

df['PACING'] = pacing
df['CONGESTION (Sender)'] = (df['CONGESTION (Sender)'] == 'cubic').astype(int) # Cubic = 1 & BBRV2 = 0

X = df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']].values
y = df['PACING'].values
y = y.astype('int')

In [3]:
df.head(5)

Unnamed: 0,STREAMS,PACING,THROUGHPUT (Sender),LATENCY (mean),RETRANSMITS,CONGESTION (Sender)
0,1,1,1630381000.0,30185.5,1535,1
1,1,1,1659032000.0,30193.0,1344,1
2,1,1,988743900.0,31422.5,0,1
3,1,2,1976052000.0,31420.5,0,1
4,1,3,2962443000.0,31489.0,0,1


In [4]:
# # Standerdization
# std_scale = preprocessing.StandardScaler().fit(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])
# df_std = std_scale.transform(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])

# # Normalization
minmax_scale = preprocessing.MinMaxScaler().fit(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])
df_minmax = minmax_scale.transform(df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']])

final_df = pd.DataFrame(df_minmax, columns=['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)'])

X = final_df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']].values

final_df.head(5)

Unnamed: 0,THROUGHPUT (Sender),LATENCY (mean),RETRANSMITS,STREAMS,CONGESTION (Sender)
0,0.149677,0.577766,1.0,0.0,1.0
1,0.152625,0.577916,0.87557,0.0,1.0
2,0.083649,0.602453,0.0,0.0,1.0
3,0.185249,0.602413,0.0,0.0,1.0
4,0.286755,0.60378,0.0,0.0,1.0


In [6]:
EPOCH = 400
BATCH = 32
LEARNING_RATE = 0.001

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_test  = torch.tensor(X_test)
y_test  = torch.tensor(y_test) 

class CustomTensorDataset(Dataset):
    """
    TensorDataset with support of transforms.
    """
    def __init__(self, tensors, transform=None):
        assert all(tensors[0].size(0) == tensor.size(0) for tensor in tensors)
        self.tensors = tensors
        self.transform = transform

    def __getitem__(self, index):
        x = self.tensors[0][index]

        if self.transform:
            x = self.transform(x)

        y = self.tensors[1][index]

        return x, y

    def __len__(self):
        return self.tensors[0].size(0)

# Dataset w/o any tranformations
traindata   = CustomTensorDataset(tensors=(X_train, y_train), transform=None)
trainloader = torch.utils.data.DataLoader(traindata, batch_size=BATCH)

testdata    = CustomTensorDataset(tensors=(X_test, y_test), transform=None)
testloader = torch.utils.data.DataLoader(testdata, batch_size=BATCH)

print(len(traindata), len(testdata))


128 56


In [8]:
for xs, ys in trainloader:
    print(xs, ys)
    break

tensor([[0.1857, 0.4042, 0.0000, 1.0000, 1.0000],
        [0.1815, 0.3843, 0.0000, 0.0000, 0.0000],
        [0.4904, 0.4109, 0.0000, 0.0000, 1.0000],
        [0.1820, 0.5853, 0.0000, 1.0000, 0.0000],
        [0.1851, 0.6036, 0.0000, 1.0000, 1.0000],
        [0.0107, 0.9786, 0.0143, 0.0000, 1.0000],
        [0.0820, 0.3894, 0.0000, 0.0000, 0.0000],
        [0.3890, 0.4057, 0.0000, 0.0000, 1.0000],
        [0.0019, 0.9790, 0.0137, 0.0000, 1.0000],
        [0.2507, 0.6061, 0.0000, 1.0000, 1.0000],
        [0.0837, 0.6025, 0.0000, 0.0000, 1.0000],
        [0.0288, 0.9800, 0.0202, 0.0000, 1.0000],
        [0.5922, 0.4100, 0.0000, 0.0000, 1.0000],
        [0.1856, 0.4042, 0.0000, 1.0000, 1.0000],
        [0.0214, 0.9787, 0.0169, 1.0000, 1.0000],
        [0.0104, 0.9787, 0.0143, 0.0000, 1.0000],
        [0.1818, 0.3883, 0.0000, 0.0000, 0.0000],
        [0.6961, 0.0042, 0.0000, 0.0000, 1.0000],
        [0.1805, 0.9882, 0.1036, 0.0000, 0.0000],
        [0.0234, 0.9802, 0.0169, 0.0000, 1.0000],


In [15]:
class PacingOptimizer(nn.Module):
    def __init__(self):
        super(PacingOptimizer, self).__init__()
        self.fc1 = torch.nn.Linear (5, 32)
        self.fc2 = torch.nn.Linear (32, 32)
        self.fc3 = torch.nn.Linear (32, 32)
        self.fc4 = torch.nn.Linear (32, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model = PacingOptimizer()
print( f"====================\nTotal params: {len(list(model.parameters()))}\n====================" )
# print(model)

Total params: 8


In [16]:
CE = nn.CrossEntropyLoss()
MSE = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
# optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)
bestloss = 10

for epoch in range(EPOCH):  # loop over the dataset multiple times

    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        # get the inputs; data is a list of [inputs, labels]
        xs, ys = data
        xs, ys = xs.float(), ys.float()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        output = model(xs)
        # print(output, ys)
        
        # loss = CE(output, ys)
        loss = MSE(ys, output)
        
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()

    print(f"[{epoch+1}/{EPOCH}] loss: {running_loss/len(trainloader.dataset):.3f}")
    running_loss = 0.0

print('Finished Training')

correct, acc, total = 0, 0, 0
with torch.no_grad():
    for xs, ys in testloader:
        xs, ys = xs.float(), ys.long()

        output = model(xs)
        loss = MSE(ys, output)
        running_loss += loss.item()
        # total += ys.size(0)
        # pred = torch.max(output, 1)[1]
        # correct += (pred == ys).sum().item()
    # acc = (100 * correct / total)
# print(acc)

[1/400] loss: 0.692
[2/400] loss: 0.649
[3/400] loss: 0.577
[4/400] loss: 0.488
[5/400] loss: 0.385
[6/400] loss: 0.278
[7/400] loss: 0.193
[8/400] loss: 0.172
[9/400] loss: 0.193
[10/400] loss: 0.186
[11/400] loss: 0.168
[12/400] loss: 0.166
[13/400] loss: 0.169
[14/400] loss: 0.169
[15/400] loss: 0.167
[16/400] loss: 0.165
[17/400] loss: 0.165
[18/400] loss: 0.165
[19/400] loss: 0.165
[20/400] loss: 0.164
[21/400] loss: 0.164
[22/400] loss: 0.164
[23/400] loss: 0.164
[24/400] loss: 0.164
[25/400] loss: 0.164
[26/400] loss: 0.164
[27/400] loss: 0.164
[28/400] loss: 0.164
[29/400] loss: 0.164
[30/400] loss: 0.164
[31/400] loss: 0.164
[32/400] loss: 0.164
[33/400] loss: 0.164
[34/400] loss: 0.164
[35/400] loss: 0.164
[36/400] loss: 0.164
[37/400] loss: 0.164
[38/400] loss: 0.164
[39/400] loss: 0.164
[40/400] loss: 0.164
[41/400] loss: 0.164
[42/400] loss: 0.164
[43/400] loss: 0.164
[44/400] loss: 0.164
[45/400] loss: 0.164
[46/400] loss: 0.164
[47/400] loss: 0.164
[48/400] loss: 0.164
[

In [None]:
# import os
# import sys

# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# import warnings

# %matplotlib inline

# # --- Sklearn ---
# from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
# from sklearn import decomposition, discriminant_analysis
# from sklearn.model_selection import GridSearchCV

# # --- Models ---
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import GradientBoostingClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.naive_bayes import GaussianNB
# from sklearn import svm
# from sklearn import neural_network
# from sklearn.linear_model import LogisticRegression

# from sklearn.preprocessing import LabelEncoder
# from sklearn.cross_validation import train_test_split

# # --- Utility ---
# import os
# import pickle, torch
# import numpy as np, pandas as pd
# import seaborn as sn
# import matplotlib.pyplot as plt


# dataPath = "data/statistics (pacing).csv"
# df_full = pd.read_csv(dataPath)
# print(df_full.describe())

In [None]:
columnList = df_full.columns
print(f"Total columns list: {columnList}")

# Dropping columns that are not required at the moment
df = df_full.drop(columns=[ 'Unnamed: 0', 'UUID', 'HOSTNAME', 'ALIAS', 'TIMESTAMP', 'STREAMS',
                            'THROUGHPUT (Receiver)', 'LATENCY (min.)', 'LATENCY (max.)', 
                            'CONGESTION (Receiver)', 'BYTES (Receiver)'
                          ])

print(f"New columns list: {df.columns}")

In [None]:
df.describe()

In [None]:
# df.head(5)

# Preprocessing

In [None]:
pacing = df['PACING'].values
for i, p in enumerate(pacing):
    v, _ = p.split("gbit")
    pacing[i] = int(v)

df['PACING'] = pacing
# df['CONGESTION (Sender)'] = df['CONGESTION (Sender)']=="cubic"=1
# df['CONGESTION (Sender)'] = df['CONGESTION (Sender)']=="bbr2"=0
df['CONGESTION (Sender)'] = (df['CONGESTION (Sender)'] == 'cubic').astype(int)

In [None]:

df.head(5)

In [None]:
# sns.set(style='whitegrid', context='notebook')
# cols = ['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'CONGESTION (Sender)', 'PACING']

# sns.pairplot(df[cols], height=3)
# plt.tight_layout()
# # plt.savefig('./figures/scatter.png', dpi=300)
# plt.show()

# Dataset

In [None]:
X = df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS']].values
y = df['PACING'].values
y = y.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
def train_and_tune(X, y, model, parameters, scoring='f1_macro', kfold=5, verbose=0):
    """
    X:          array-like of shape (n_samples, n_features)
    y:          array-like of shape (n_samples,)
    model:      (object) a sklearn model class
    parameters: (dict) contains the parameters you want to tune in the model
    metric:     (str) the metric used to evaluate the quality of the model
    return:     a trained model with the best parameters
    """
    cvSearchObj = GridSearchCV(model,
                               parameters,
                               scoring=scoring,
                               n_jobs=-1,
                               cv=kfold,
                               verbose=verbose)
    cvSearchObj.fit(X,y)
    return cvSearchObj.best_estimator_

def save_model(filename, model):
    """
    filename: Filename to save the model
    model:    Model weights to be saved
    """
    pickle.dump(model, open(filename, 'wb'))
    print("Model Saved")

def load_model(filename):
    """
    filename: Filename to load the model
    return:   Model weights that are reloaded
    """
    model_reloaded = pickle.load(open(filename, 'rb'))
    return model_reloaded


def MLP(train, save, test):
    filename = "./mlpBest.pkl"
    mlp = neural_network.MLPClassifier(random_state=999)
    if train:
        '''
        Train
        '''
        params = {"alpha" : [0.0001],
                "learning_rate_init" : [0.001],
                "batch_size" : [32, 64, 128],
                "activation" : ["relu"],
                "early_stopping" : [True],
                "hidden_layer_sizes" : [10, 50, 100],
                }

        mlpBest = train_and_tune(X, y,
                                 mlp,
                                 params,
                                 scoring='f1_macro',
                                 kfold=5)

        if save:
            save_model(filename, mlpBest)

    if test:
        '''
        Test
        '''
        mlpBest_reloaded = load_model(filename)
        pred = mlpBest_reloaded.predict(X)
        acc  = mlpBest_reloaded.score(X, y)
        
        # cf_matrix = confusion_matrix(y, pred)
        # df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
        #                      columns = [i for i in classes])
        # plt.figure(figsize = (12,10))
        # sn.heatmap(df_cm, annot=True)
        
        print("Accuracy: ", acc)

MLP(train=True, save=True, test=True)

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)
# le.transform(['M', 'B'])

model = LogisticRegression()
model.fit(X_train, y_train)
print('Test Accuracy: %.3f' % model.score(X_test, y_test))