In [44]:
%load_ext autoreload
%autoreload 2
import pickle
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix
import seaborn as sns
import copy
from keras.models import load_model
import joblib
import os

from brate_trainer_mlp_lstm import *
from data_columns import *
import load_data as ld
pd.options.mode.chained_assignment = None  # default='warn'

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [45]:
PARAMETERS = {
    'aggr_wind_size': '12', 
    'horz_wind_size': '2'
}
model_types = ['rf', 'xgb', 'mlp', 'lstm']
brate_model_num = {
    'rf': 201,
    'xgb': 201,
    'mlp': 201,
    'lstm': 201
}

#tf.device('cpu')
# rf: brate:3, nseg:4
# xgb: brate:3, nseg:4
# mlp: brate:5, nseg:17
# lstm: brate:2, nseg:6

In [46]:
for model_type in model_types:
    with tf.device('/cpu:0'):

        train_data_dir = '../data/data_train/dataset7-' \
                                + PARAMETERS['horz_wind_size'] + 'sWsize-' \
                                + PARAMETERS['aggr_wind_size'] + 'aggsize/'
        eval_data_dir = '../data/data_eval/dataset7-' \
                                    + PARAMETERS['horz_wind_size'] + 'sWsize-' \
                                    + PARAMETERS['aggr_wind_size'] + 'aggsize/'
        brate_model_path = '../models/'+model_type+'/model' \
                                        +str(brate_model_num[model_type])+'/'
        scaler_filename = '../models/'+'mlp'+'/model' \
                                        +str(brate_model_num['mlp'])+'/'+'X_scaler.pkl'
        #scaler_filename = brate_model_path +'X_scaler.pkl'
        eval_csvs = ld.get_files_in_subdirectory(eval_data_dir)
        # Parent dir for saving results
        eval_out_dir = '../data/data_eval_output/dataset7-' \
                                + PARAMETERS['horz_wind_size'] + 'sWsize-' \
                                + PARAMETERS['aggr_wind_size'] + 'aggsize/'
        if not os.path.exists(eval_out_dir):
            os.makedirs(eval_out_dir)
            
        if model_type == 'rf' or model_type == 'xgb':
            dir_brate = brate_model_path + "brate_classifier.pkl"
            with open(dir_brate, 'rb') as f:
                model_dict = pickle.load(f)    
            model_brate = model_dict["classifier"]
        elif model_type == 'mlp' or model_type == 'lstm':
            model_brate = load_model(brate_model_path)
        else:
            print('Do not know what you are asking me to do')

        for file in eval_csvs:
            print(file)
            df_eval = pd.read_csv(eval_data_dir + file)
            # Path to save the resulting evaluation data
            out_file = file.replace('.csv', 
                                    '_'+model_type+'.csv')
            dir_eval_out = eval_out_dir + out_file
            print(dir_eval_out)
            cols_eval_df = ['Thor_start', 'Thor_end', 'Node', 'video-Id', 
                            'last-SegId', 'n-PredSeg', 'n-PredSeg-TRUE',
                            'mode-SegQuality', 'baseStatAssoc']
            df_eval_result = pd.DataFrame(columns=cols_eval_df)        
            df_eval_result = df_eval[['Thor_start', 'Thor_end', 'Node', 'video-Id', 'last-SegId']].copy()
            # Extract X and Y for predictors
            # X and y here are real values
            X_brate = df_eval[FEAT_COLS].to_numpy()
            y_brate = df_eval[TARGET_SEGMODE].to_numpy().squeeze()
            y_nseg = df_eval[TARGET_SEGNUMBER].to_numpy().squeeze()
            y_gnb = df_eval[TARGET_CELLIDASSOC].to_numpy().squeeze()

            if (model_type == 'rf') or (model_type == 'xgb'):    
                yhat_brate = model_brate.predict(X_brate)
                #print('yhat_brate: ',yhat_brate)

            if (model_type == 'mlp') or (model_type == 'lstm'):
                # normalize 
                #brate_scaler = MinMaxScaler()
                #nseg_scaler = MinMaxScaler()
                #X_brate = brate_scaler.fit_transform(X_brate)
                #X_nseg = nseg_scaler.fit_transform(X_nseg)

                # Get the scaler created using the train set. This should have been saved while training or preprocessing 
                # and loaded along with the model when predicting for eval dataset. But I have not saved this, so I am just using the train set 
                # to re-create this scaler 
                df_train = ld.load_rf_trainer_data(train_data_dir, verbose=False)
                df_train = df_train.astype('float32')
                df_train = df_train[~df_train.isin([np.nan, np.inf, -np.inf]).any(1)]
                df_train_brate = df_train.loc[df_train[TARGET_SEGMODE[0]] > 0.0].copy()
                X_train = df_train[FEAT_COLS].values
                # load the saved scaler 
                X_scaler = joblib.load(scaler_filename) 
                #X_scaler = MinMaxScaler()
                #X_scaler.fit(X_train)
                X_brate = X_scaler.transform(X_brate)


                if (model_type == 'lstm'):
                    # reshape X with extra dimension for lstm
                    X_brate = X_brate.reshape((X_brate.shape[0], 
                                      1, X_brate.shape[1]))

                # brate      
                yhat_brate = model_brate.predict(X_brate)
                # y is OHE. convert y to integer encoded
                yhat_brate = np.argmax(yhat_brate, axis=1)
                # convert integer encoded to actual bitrate values

                yhat_brate = np.array(BITRATE_LIST)[yhat_brate]

            #new
            # yhat_nseg is only 0 or 1
            # should this be matched to the y_nseg ?
            # Even though the segment is actually requested right after the aggregation window 
            # where the previous request was seen and we waited until the request was actually 
            # sent out to evaluate if we actually right.
            # So we are shifting the yhat_nseg until the window where it is actually evaluated. . 
            # basically propagating it, like we did with the features
            # So....every window where > 0 y_nseg exists we have a 1 yhat_nseg and when y_nseg is 0
            # yhat_nseg is also 0. 
            # This is assuming that, if a segments are requested in consequtive windows, we are able to fetch 
            # it in time.  
            yhat_nseg = np.where(y_nseg == 0, 0, 1)
            # Set to 0 samples where there are be no ground truth segment requests in horz window 
            yhat_brate = np.where(y_nseg == 0, 0, yhat_brate)


            df_eval_result.loc[:,'n-PredSeg-TRUE'] = y_nseg.copy() 
            df_eval_result.loc[:,'n-PredSeg'] = yhat_nseg.copy() 
            df_eval_result.loc[:,'mode-SegQuality'] = yhat_brate.copy() 
            df_eval_result.loc[:,'baseStatAssoc'] = y_gnb.copy() 

            # Saving evaluation file
            df_eval_result = df_eval_result.rename(columns={"Thor_start": "Tpred_start", 'Thor_end': "Tpred_end"})
            df_eval_result = df_eval_result.sort_values(['Tpred_start', 'Tpred_end', 'Node'])
            df_eval_result.to_csv(dir_eval_out, index=False)

run6wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run6wsize2_rf.csv
run27wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run27wsize2_rf.csv
run21wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run21wsize2_rf.csv
run12wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run12wsize2_rf.csv
run6wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run6wsize2_xgb.csv
run27wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run27wsize2_xgb.csv
run21wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run21wsize2_xgb.csv
run12wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run12wsize2_xgb.csv
run6wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run6wsize2_mlp.csv
run27wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run27wsize2_mlp.csv
run21wsize2.csv
../data/data_eval_output/dataset7-2sWsize-12aggsize/run21wsize2_mlp.csv
run12wsize2.csv
../data/data_eval_output/d