In [11]:
from __future__ import absolute_import, print_function

# --- System ---
import os
import sys
import warnings

# --- Utility ---
import pandas as pd
import numpy as np
import math
import random
import logging
import pickle
from sklearn.model_selection import train_test_split

# --- Plot --
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# --- Pytorch ---
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
# import torchvision.datasets as datasets
import torch.backends.cudnn as cudnn

from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from datetime import datetime
from torch.utils.data import random_split 

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything()

device = torch.device("cuda:1" if torch.cuda.is_available() else "cpu")
root_dir = os.getcwd()

In [12]:
dataPath = "data/statistics (pacing).csv"
df = pd.read_csv(dataPath)
# columnList = df.columns

# Dropping columns that are not required at the moment
df = df.drop(columns=[ 'Unnamed: 0', 'UUID', 'HOSTNAME', 'ALIAS', 'TIMESTAMP',
                       'THROUGHPUT (Receiver)', 'LATENCY (min.)', 'LATENCY (max.)', 
                       'CONGESTION (Receiver)', 'BYTES (Receiver)'
                     ])

# Pre-processing
pacing = df['PACING'].values
for i, p in enumerate(pacing):
    v, _ = p.split("gbit")
    pacing[i] = int(v)

df['PACING'] = pacing
df['CONGESTION (Sender)'] = (df['CONGESTION (Sender)'] == 'cubic').astype(int)

In [13]:
X = df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'STREAMS', 'CONGESTION (Sender)']].values
y = df['PACING'].values
y = y.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [14]:
EPOCH = 50
BATCH = 4
LEARNING_RATE = 0.001

mean, std = 602736768.0000, 1598374016.0000

transform_=transforms.Compose([
          transforms.ToTensor()
        , transforms.Normalize(mean, std)
    ])

traindata   = TensorDataset(torch.Tensor(X_train),
                            torch.Tensor(y_train),
                            transform=transform_)
trainloader = DataLoader(traindata,
                        batch_size = BATCH,
                        shuffle = True
                        )

testdata   = TensorDataset(torch.Tensor(X_test),
                           torch.Tensor(y_test),
                           transform=transform_
                          )
testloader = DataLoader(testdata,
                        batch_size = BATCH,
                        shuffle = True
                       )

In [15]:
# data = next(iter(trainloader))
# mean, std = data[0].mean(), data[0].std()
# print(f"{mean:.4f}, {std:.4f}")

602736768.0000, 1598374016.0000


In [None]:
class Network(nn.Module):
    def __init__(self):
        super(Network, self).__init__()
        self.fc1 = torch.nn.Linear (5, 64)
        self.fc2 = torch.nn.Linear (64, 64)
        self.fc3 = torch.nn.Linear (64, 1)
        self.sig = torch.nn.Sigmoid(),

    def forward(self, x):
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)
        x = torch.sigmoid(x)
        x = self.fc3(x)
        return x

model = Network()
print( f"====================\nTotal params: {len(list(model.parameters()))}\n====================" )
# print(model)

In [None]:
# criterion = nn.CrossEntropyLoss()
criterion = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=LEARNING_RATE, momentum=0.9)
bestloss = 10

# for epoch in range(EPOCH):  # loop over the dataset multiple times

#     running_loss = 0.0
#     for i, data in enumerate(trainloader, 0):
#         # get the inputs; data is a list of [inputs, labels]
#         xs, ys = data
#         xs, ys = xs.float(), ys.float()

#         # zero the parameter gradients
#         optimizer.zero_grad()

#         # forward + backward + optimize
#         outputs = net(xs)

#         loss = criterion(ys, outputs)
#         loss.backward()
#         optimizer.step()

#         # print statistics
#         running_loss += loss.item()

#     print(f"[{epoch+1}/{i+1}] loss: {running_loss/len(trainloader.dataset)}")
#     running_loss = 0.0

# print('Finished Training')

def train(epoch):

    acc, correct, loss = 0.0, 0.0, 0.0
    running_loss = 0.0

    model.train()
    
    for xs, ys in trainloader:
        xs, ys = xs.to(device), ys.to(device)
        # --- Model ---
        optimizer.zero_grad()
        output =  model(xs)
        # --- Loss ---
        loss = criterion(ys, output)
        loss.backward()
        optimizer.step()
        # --- Statistics ---
        running_loss += loss.item() * xs.size(0)
    epoch_loss  = running_loss/len(traindata)
    return epoch_loss

def test(epoch):
    
    acc, correct, loss = 0.0, 0.0, 0.0
    running_loss = 0.0
    
    model.eval()
    
    with torch.no_grad():
        for xs, ys in testloader:
            xs, ys = xs.to(device), ys.to(device)
            
            # --- Model ---
            output = model(xs)
            # --- Loss ---
            loss = criterion(ys, output)
            # --- Statistics ---
            running_loss += loss.item() * xs.size(0)
        epoch_loss  = running_loss/len(testdata)
    return epoch_loss
    
if not os.path.isdir(str(root_dir)+'/checkpoint'):
    os.mkdir(str(root_dir)+'/checkpoint')

print("Epoch", "TR-loss", "TS-loss", sep=' '*8, end="\n")

for epoch in range(EPOCH):
    
    trainloss = train(epoch)
    testloss = test(epoch)

    print(f"{epoch+0:03}/{EPOCH}", f"{trainloss:.4f}", f"{testloss:.4f}", sep=' '*8, end="\n")
    
    # Saving the model.
    is_best = testloss < bestloss
    bestloss = min(testloss, bestloss)
    if is_best:
        torch.save(model.state_dict(), str(root_dir)+"/checkpoint/pacing_"+str(epoch)+".pt")
        print("Model Saved.")
print("="*100)


In [None]:
# import os
# import sys

# import pandas as pd
# import numpy as np
# import matplotlib.pyplot as plt
# import seaborn as sns
# import warnings

# %matplotlib inline

# # --- Sklearn ---
# from sklearn.metrics import confusion_matrix, f1_score, accuracy_score
# from sklearn import decomposition, discriminant_analysis
# from sklearn.model_selection import GridSearchCV

# # --- Models ---
# from sklearn.tree import DecisionTreeClassifier
# from sklearn.ensemble import GradientBoostingClassifier
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.naive_bayes import GaussianNB
# from sklearn import svm
# from sklearn import neural_network
# from sklearn.linear_model import LogisticRegression

# from sklearn.preprocessing import LabelEncoder
# from sklearn.cross_validation import train_test_split

# # --- Utility ---
# import os
# import pickle, torch
# import numpy as np, pandas as pd
# import seaborn as sn
# import matplotlib.pyplot as plt



# dataPath = "data/statistics (pacing).csv"
# df_full = pd.read_csv(dataPath)
# print(df_full.describe())

In [None]:
columnList = df_full.columns
print(f"Total columns list: {columnList}")

# Dropping columns that are not required at the moment
df = df_full.drop(columns=[ 'Unnamed: 0', 'UUID', 'HOSTNAME', 'ALIAS', 'TIMESTAMP', 'STREAMS',
                            'THROUGHPUT (Receiver)', 'LATENCY (min.)', 'LATENCY (max.)', 
                            'CONGESTION (Receiver)', 'BYTES (Receiver)'
                          ])

print(f"New columns list: {df.columns}")

In [None]:
df.describe()

In [None]:
# df.head(5)

# Preprocessing

In [None]:
pacing = df['PACING'].values
for i, p in enumerate(pacing):
    v, _ = p.split("gbit")
    pacing[i] = int(v)

df['PACING'] = pacing
# df['CONGESTION (Sender)'] = df['CONGESTION (Sender)']=="cubic"=1
# df['CONGESTION (Sender)'] = df['CONGESTION (Sender)']=="bbr2"=0
df['CONGESTION (Sender)'] = (df['CONGESTION (Sender)'] == 'cubic').astype(int)

In [None]:

df.head(5)

In [None]:
# sns.set(style='whitegrid', context='notebook')
# cols = ['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS', 'CONGESTION (Sender)', 'PACING']

# sns.pairplot(df[cols], height=3)
# plt.tight_layout()
# # plt.savefig('./figures/scatter.png', dpi=300)
# plt.show()

# Dataset

In [None]:
X = df[['THROUGHPUT (Sender)', 'LATENCY (mean)', 'RETRANSMITS']].values
y = df['PACING'].values
y = y.astype('int')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

In [None]:
def train_and_tune(X, y, model, parameters, scoring='f1_macro', kfold=5, verbose=0):
    """
    X:          array-like of shape (n_samples, n_features)
    y:          array-like of shape (n_samples,)
    model:      (object) a sklearn model class
    parameters: (dict) contains the parameters you want to tune in the model
    metric:     (str) the metric used to evaluate the quality of the model
    return:     a trained model with the best parameters
    """
    cvSearchObj = GridSearchCV(model,
                               parameters,
                               scoring=scoring,
                               n_jobs=-1,
                               cv=kfold,
                               verbose=verbose)
    cvSearchObj.fit(X,y)
    return cvSearchObj.best_estimator_

def save_model(filename, model):
    """
    filename: Filename to save the model
    model:    Model weights to be saved
    """
    pickle.dump(model, open(filename, 'wb'))
    print("Model Saved")

def load_model(filename):
    """
    filename: Filename to load the model
    return:   Model weights that are reloaded
    """
    model_reloaded = pickle.load(open(filename, 'rb'))
    return model_reloaded


def MLP(train, save, test):
    filename = "./mlpBest.pkl"
    mlp = neural_network.MLPClassifier(random_state=999)
    if train:
        '''
        Train
        '''
        params = {"alpha" : [0.0001],
                "learning_rate_init" : [0.001],
                "batch_size" : [32, 64, 128],
                "activation" : ["relu"],
                "early_stopping" : [True],
                "hidden_layer_sizes" : [10, 50, 100],
                }

        mlpBest = train_and_tune(X, y,
                                 mlp,
                                 params,
                                 scoring='f1_macro',
                                 kfold=5)

        if save:
            save_model(filename, mlpBest)

    if test:
        '''
        Test
        '''
        mlpBest_reloaded = load_model(filename)
        pred = mlpBest_reloaded.predict(X)
        acc  = mlpBest_reloaded.score(X, y)
        
        # cf_matrix = confusion_matrix(y, pred)
        # df_cm = pd.DataFrame(cf_matrix/np.sum(cf_matrix) *10, index = [i for i in classes],
        #                      columns = [i for i in classes])
        # plt.figure(figsize = (12,10))
        # sn.heatmap(df_cm, annot=True)
        
        print("Accuracy: ", acc)

MLP(train=True, save=True, test=True)

In [None]:
le = LabelEncoder()
y = le.fit_transform(y)
# le.transform(['M', 'B'])

model = LogisticRegression()
model.fit(X_train, y_train)
print('Test Accuracy: %.3f' % model.score(X_test, y_test))