In [51]:
import decimal
from collections import defaultdict
import matplotlib
import numpy as np
import itertools
import os
import pandas as pd
import datetime
import json
from matplotlib import pyplot as plt
from tqdm import tqdm
import glob
import pdb
from sklearn.preprocessing import StandardScaler
from sklearn import svm
from sklearn.metrics import fbeta_score
from catboost import CatBoostClassifier
from catboost import Pool, CatBoostRegressor
sys.path.insert(0, '../../scripts/modeling_toolbox/')
sys.path.insert(0, '../../scripts/asset_processor/')
import utils
from video_asset_processor import VideoAssetProcessor

%matplotlib notebook
matplotlib.style.use('ggplot')
matplotlib.rcParams['figure.figsize']=[10,6]
pd.options.display.width = 0
pd.set_option('display.max_columns', None)

In [18]:
%matplotlib notebook

In [53]:
FEATURES_UL = ['size_dimension_ratio',
               'temporal_dct-mean',
               'temporal_gaussian_mse-mean',
               'temporal_gaussian_difference-mean',
               'temporal_threshold_gaussian_difference-mean'
               ]

FEATURES_SL = ['temporal_dct-mean',
               'temporal_gaussian_mse-mean',
               'temporal_gaussian_difference-mean',
               'temporal_threshold_gaussian_difference-mean'
               ]
FEATURES_QOE = ['temporal_dct-mean',
            'temporal_gaussian_mse-mean',
            'temporal_gaussian_difference-mean',
            'temporal_threshold_gaussian_difference-mean'
            ]
METRICS_QOE = ['temporal_ssim-mean']

train_fraction = 0.85

In [54]:
# read data
data = utils.load_data('../../../data/data-large.csv', 100)
# convert features extracted with legacy code
data[np.logical_not(data.rendition.str.contains('_fps'))] = VideoAssetProcessor.rescale_to_resolution(data[np.logical_not(data.rendition.str.contains('_fps'))], FEATURES_UL)

Upscaling temporal_dct temporal_dct-mean
Upscaling temporal_gaussian_mse temporal_gaussian_mse-mean
Upscaling temporal_gaussian_difference temporal_gaussian_difference-mean


  data_df['target'] = data_df['rendition'].str.contains('^[0-9]+p(_[0-9][0-9]?-[0-9][0-9]?fps(_gpu)?)?$')


In [55]:
# class balance, true is 'no tamper, accurate rendition', false is tampered rendition
data.groupby('target').target.sum()

target
False     0.0
True     10.0
Name: target, dtype: float64

In [56]:
# split test and train
np.random.seed(1337)
train_data = data.sample(frac=train_fraction)
test_data = data[~data.index.isin(train_data.index)]

In [45]:
### OCSVM MODEL
x_train_ul = np.asarray(train_data[FEATURES_UL])
x_test_ul = np.asarray(test_data[FEATURES_UL])
# scale the data
scaler = StandardScaler()
x_train_ul = scaler.fit_transform(x_train_ul)
x_test_ul = scaler.transform(x_test_ul)
# fit the model
oc_svm = svm.OneClassSVM(kernel='rbf', gamma=0.3, nu=0.001, cache_size=5000)
oc_svm.fit(x_train_ul)

OneClassSVM(cache_size=5000, coef0=0.0, degree=3, gamma=0.3, kernel='rbf',
            max_iter=-1, nu=0.001, shrinking=True, tol=0.001, verbose=False)

In [48]:
### CATBOOST MODEL
cat_features = []
# Initialize CatBoostClassifier
cb_params = dict(iterations=500,
                 learning_rate=0.05,
                 depth=6)
catboost_binary = CatBoostClassifier(**cb_params)
# Fit model
catboost_binary.fit(np.asarray(train_data[FEATURES_SL]), train_data['target'], cat_features)


0:	learn: 0.6116105	total: 63.5ms	remaining: 31.7s
1:	learn: 0.5437286	total: 65.3ms	remaining: 16.3s
2:	learn: 0.4847980	total: 66.9ms	remaining: 11.1s
3:	learn: 0.4382283	total: 68.4ms	remaining: 8.49s
4:	learn: 0.3957867	total: 70ms	remaining: 6.93s
5:	learn: 0.3566864	total: 71.5ms	remaining: 5.88s
6:	learn: 0.3221192	total: 73.1ms	remaining: 5.15s
7:	learn: 0.2956412	total: 74.6ms	remaining: 4.59s
8:	learn: 0.2703942	total: 76.1ms	remaining: 4.15s
9:	learn: 0.2468710	total: 77.4ms	remaining: 3.79s
10:	learn: 0.2266443	total: 78.7ms	remaining: 3.5s
11:	learn: 0.2142966	total: 80ms	remaining: 3.25s
12:	learn: 0.1965739	total: 81.1ms	remaining: 3.04s
13:	learn: 0.1818316	total: 81.8ms	remaining: 2.84s
14:	learn: 0.1698476	total: 82.6ms	remaining: 2.67s
15:	learn: 0.1592295	total: 83.5ms	remaining: 2.53s
16:	learn: 0.1512601	total: 84.2ms	remaining: 2.39s
17:	learn: 0.1440047	total: 85.3ms	remaining: 2.28s
18:	learn: 0.1366869	total: 86.6ms	remaining: 2.19s
19:	learn: 0.1273431	total:

215:	learn: 0.0046477	total: 379ms	remaining: 498ms
216:	learn: 0.0046243	total: 380ms	remaining: 496ms
217:	learn: 0.0046013	total: 382ms	remaining: 494ms
218:	learn: 0.0045785	total: 383ms	remaining: 492ms
219:	learn: 0.0045560	total: 385ms	remaining: 490ms
220:	learn: 0.0045335	total: 386ms	remaining: 488ms
221:	learn: 0.0045113	total: 388ms	remaining: 485ms
222:	learn: 0.0044893	total: 389ms	remaining: 483ms
223:	learn: 0.0044675	total: 391ms	remaining: 481ms
224:	learn: 0.0044460	total: 392ms	remaining: 479ms
225:	learn: 0.0044246	total: 393ms	remaining: 477ms
226:	learn: 0.0044035	total: 395ms	remaining: 475ms
227:	learn: 0.0043828	total: 396ms	remaining: 472ms
228:	learn: 0.0043621	total: 397ms	remaining: 470ms
229:	learn: 0.0043416	total: 415ms	remaining: 487ms
230:	learn: 0.0043213	total: 416ms	remaining: 485ms
231:	learn: 0.0043012	total: 418ms	remaining: 483ms
232:	learn: 0.0042815	total: 420ms	remaining: 481ms
233:	learn: 0.0042618	total: 421ms	remaining: 479ms
234:	learn: 

413:	learn: 0.0023863	total: 721ms	remaining: 150ms
414:	learn: 0.0023807	total: 721ms	remaining: 148ms
415:	learn: 0.0023751	total: 722ms	remaining: 146ms
416:	learn: 0.0023696	total: 722ms	remaining: 144ms
417:	learn: 0.0023641	total: 723ms	remaining: 142ms
418:	learn: 0.0023586	total: 723ms	remaining: 140ms
419:	learn: 0.0023531	total: 724ms	remaining: 138ms
420:	learn: 0.0023476	total: 725ms	remaining: 136ms
421:	learn: 0.0023422	total: 733ms	remaining: 135ms
422:	learn: 0.0023368	total: 734ms	remaining: 134ms
423:	learn: 0.0023315	total: 735ms	remaining: 132ms
424:	learn: 0.0023261	total: 736ms	remaining: 130ms
425:	learn: 0.0023208	total: 737ms	remaining: 128ms
426:	learn: 0.0023155	total: 737ms	remaining: 126ms
427:	learn: 0.0023102	total: 738ms	remaining: 124ms
428:	learn: 0.0023050	total: 739ms	remaining: 122ms
429:	learn: 0.0022998	total: 740ms	remaining: 120ms
430:	learn: 0.0022946	total: 740ms	remaining: 119ms
431:	learn: 0.0022894	total: 741ms	remaining: 117ms
432:	learn: 

<catboost.core.CatBoostClassifier at 0x7fad753615f8>

In [59]:
train_data[METRICS_QOE]

KeyError: "None of [Index(['temporal_ssim-mean'], dtype='object')] are in the [columns]"

In [57]:
### CATBOOST QOE REGRESSOR
categorical_features_indices = []

train_pool = Pool(data=train_data[FEATURES_QOE],
                  label=train_data[METRICS_QOE],
                  cat_features=categorical_features_indices)

loss_funct = 'MAE'
cb_params = {'depth':6,
             'num_trees':100,
             'l2_leaf_reg':5,
             'learning_rate':0.05,
             'loss_function':loss_funct}
model_catbootregressor = CatBoostRegressor(**cb_params)

#Train the model
print('Training QoE model:')
model_catbootregressor.fit(train_pool)

KeyError: "None of [Index(['temporal_ssim-mean'], dtype='object')] are in the [columns]"

In [60]:
train_data[FEATURES_QOE]


Unnamed: 0,temporal_dct-mean,temporal_gaussian_mse-mean,temporal_gaussian_difference-mean,temporal_threshold_gaussian_difference-mean
2,3.203973e+06,348.727302,2.549318e+09,1068.203390
48,4.183455e+07,108911.051177,2.133615e+10,42145.762712
86,8.900453e+05,490.812508,2.492159e+08,2652.275862
44,1.770141e+05,3.245281,1.355139e+08,2.203390
59,7.874041e+06,4492.344305,1.860033e+09,2636.224138
56,3.520314e+07,82474.067172,2.990292e+10,47790.189655
79,3.887698e+04,1.078843,4.195069e+07,5.362069
70,5.700247e+04,0.909072,2.740242e+07,34.500000
91,1.758022e+05,18.345111,2.406617e+08,120.448276
17,5.384979e+04,3.361154,4.718287e+07,123.050847
