In [63]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor
rf = RandomForestRegressor(random_state = 42)
from pprint import pprint
# Look at parameters used by our current forest
print('Parameters currently in use:\n')
pprint(rf.get_params())

Parameters currently in use:

{'bootstrap': True,
 'criterion': 'mse',
 'max_depth': None,
 'max_features': 'auto',
 'max_leaf_nodes': None,
 'min_impurity_decrease': 0.0,
 'min_impurity_split': None,
 'min_samples_leaf': 1,
 'min_samples_split': 2,
 'min_weight_fraction_leaf': 0.0,
 'n_estimators': 'warn',
 'n_jobs': None,
 'oob_score': False,
 'random_state': 42,
 'verbose': 0,
 'warm_start': False}


In [64]:
import numpy as np

from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4]
# Method of selecting samples for training each tree
bootstrap = [True, False]
# Create the random grid
random_grid = {
               'n_estimators': n_estimators,
               'max_features': max_features,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf,
               'bootstrap': bootstrap
}
pprint(random_grid)

{'bootstrap': [True, False],
 'max_depth': [10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'max_features': ['auto', 'sqrt'],
 'min_samples_leaf': [1, 2, 4],
 'min_samples_split': [2, 5, 10],
 'n_estimators': [200, 400, 600, 800, 1000, 1200, 1400, 1600, 1800, 2000]}


In [None]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
from sklearn.ensemble import RandomForestRegressor
rf = RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model

import pandas as pd

list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']

data = pd.read_csv("train_data.csv")

#lst_x = []
#for i in range(len(data)):
 #   lst_x.append([data.number_of_slot[i], data.number_of_day[i], data.number_of_weekday[i], data.number_of_hour[i]])

#lst_y = []
#for i in range(len(data)):
 #   lst_y.append([round(data.stream1[i],6) , round(data.stream2[i], 6), round(data.stream3[i], 6),  round(data.stream4[i], 6), round(data.stream5[i], 6), round(data.bandwidth_available_OTT[i], 6)])

x = data[list_of_train]

y = data[list_of_target]

rf_random.fit(x, y)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  3.9min
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 613.7min


In [29]:
rf_random.best_params_

{'min_samples_split': 10,
 'min_samples_leaf': 4,
 'max_features': 'auto',
 'max_depth': 60}

In [39]:
import pandas as pd
import numpy as np

list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']

data = pd.read_csv("train_data.csv")

#lst_x = []
#for i in range(len(data)):
 #   lst_x.append([data.number_of_slot[i], data.number_of_day[i], data.number_of_weekday[i], data.number_of_hour[i]])

#lst_y = []
#for i in range(len(data)):
 #   lst_y.append([round(data.stream1[i],6) , round(data.stream2[i], 6), round(data.stream3[i], 6),  round(data.stream4[i], 6), round(data.stream5[i], 6), round(data.bandwidth_available_OTT[i], 6)])

x = data[list_of_train]

y = data[list_of_target]

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

#clf = RandomForestClassifier()

reg = RandomForestRegressor(min_samples_split=10, min_samples_leaf=4,max_features='auto',max_depth=60)

x_d = pd.read_csv("test_data.csv")

#lst_test_x = []
#for i in range(len(x_d)):
 #   lst_test_x.append([x_d.number_of_slot[i], x_d.number_of_day[i], x_d.number_of_weekday[i], x_d.number_of_hour[i]])

test_x = x_d[list_of_train]

# fit the model to the training data (learn the coefficients)
reg.fit(x, y)

# make predictions on the testing set
y_pred = reg.predict(test_x)

lst = []
for i in range(len(y_pred)):
    round_lst = []
    for j in range(len(y_pred[i])):
        round_lst.append(round(y_pred[i][j], 6))
        
    lst.append(round_lst)

predstream = np.zeros((2256,5))

for i in range(2256):
    for j in range(5):
        predstream[i,j] =y_pred[i,j] 
        
m = np.zeros((2256,1))
max_val = np.zeros((2256,1))
for k in range(2256):
    for l in range(5):
        if max_val[k] < predstream[k,l]:
            max_val[k] =  predstream[k,l]
            m[k] = l

for i in range(len(m)):
    print(m[i])



[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[1.]
[2.]
[2.]
[4.]
[0.]
[4.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[2.]
[2.]
[2.]
[2.]
[2.]
[2.]
[3.]
[3.]
[1.]
[1.]
[3.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[2.]
[1.]
[2.]
[2.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[0.]
[2.]
[2.]
[4.]
[0.]
[4.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[1.]
[1.]
[1.]
[1.]
[2.]
[1.]
[1.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[1.]
[2.]
[2.]
[4.]
[0.]
[4.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]


[2.]
[2.]
[2.]
[2.]
[2.]
[3.]
[3.]
[1.]
[1.]
[3.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[2.]
[1.]
[2.]
[2.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[0.]
[2.]
[2.]
[4.]
[0.]
[4.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[1.]
[1.]
[1.]
[1.]
[2.]
[1.]
[1.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[1.]
[2.]
[2.]
[4.]
[0.]
[4.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[1.]
[4.]
[1.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[2.]
[2.]
[2.]
[2.]
[2.]
[2.]
[3.]
[3.]
[1.]
[1.]
[3.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[2.]
[1.]
[2.]
[2.]
[2.]
[2.]
[2.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]
[4.]


In [51]:
##### import pandas as pd
import numpy as np

list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']

data = pd.read_csv("train_data.csv")

x = data[list_of_train]

y = data[list_of_target]

def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    print(predictions)
    errors = abs(predictions - test_labels)
    #mape = 100 * np.mean(errors / test_labels)
    #accuracy = 100 - mape
    print('Model Performance')
    print('Average Error: {:0.4f} degrees.'.format((np.mean(errors))))
    #print('Accuracy = {:0.2f}%.'.format(accuracy))
    #print('Accuracy = ' , accuracy)
    #print('Average Error:', np.mean(errors))
    
    #return accuracy

base_model = RandomForestRegressor(n_estimators=800, min_samples_split=5, min_samples_leaf=4,max_features='auto',max_depth=50)
base_model.fit(x, y)

data = pd.read_csv("testing_data.csv")

lst_x = []
for i in range(len(data)):
    lst_x.append([data.number_of_slot[i], data.number_of_day[i], data.number_of_weekday[i], data.number_of_hour[i]])

lst_y = []
for i in range(len(data)):
    lst_y.append([round(data.stream1[i],6) , round(data.stream2[i], 6), round(data.stream3[i], 6),  round(data.stream4[i], 6), round(data.stream5[i], 6), round(data.bandwidth_available_OTT[i], 6)])

#x_test = data[list_of_train]

#y_test = data[list_of_target]

base_accuracy = evaluate(base_model, lst_x, lst_y)

[[0.00000000e+00 1.81392236e-01 1.28238772e+00 4.13300709e-01
  0.00000000e+00 4.94593867e+00]
 [0.00000000e+00 3.41516210e-01 9.46864789e-01 4.46643805e-01
  0.00000000e+00 6.33954319e+00]
 [0.00000000e+00 6.39519481e-01 9.34317691e-01 1.88036338e-01
  0.00000000e+00 6.59992469e+00]
 ...
 [8.03482153e-01 0.00000000e+00 1.29211920e+00 9.25621599e-01
  2.63802620e+00 9.21468036e-01]
 [5.61408891e-01 2.65937680e-03 1.15129627e+00 5.51126536e-01
  4.40465282e+00 1.39501742e+00]
 [7.87246469e-01 2.21171999e-02 5.65141531e-01 7.25735307e-01
  3.05566002e+00 2.95642404e+00]]
Model Performance
Average Error: 0.2123 degrees.


In [148]:
import numpy as np

from sklearn.model_selection import RandomizedSearchCV
# Number of trees in random forest
n_estimators = [100, 300, 400, 600, 800, 1000]
# Maximum number of levels in tree
max_depth = [20, 30, 40, 50, 60, 70, 80, 90, 100, 110]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [8, 10, 12, 14]
# Minimum number of samples required at each leaf node
min_samples_leaf = [3, 4, 5, 6]
# Create the random grid
random_grid = {'n_estimators': n_estimators,
               'max_depth': max_depth,
               'min_samples_split': min_samples_split,
               'min_samples_leaf': min_samples_leaf}
pprint(random_grid)

{'max_depth': [20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None],
 'min_samples_leaf': [3, 4, 5, 6],
 'min_samples_split': [8, 10, 12, 14],
 'n_estimators': [100, 300, 400, 600, 800, 1000]}


In [149]:
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = RandomForestRegressor()
# Random search of parameters, using 3 fold cross validation, 
# search across 100 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model

import pandas as pd

list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
#list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']
list_of_target = ['stream3']

data = pd.read_csv("train_data.csv")

#lst_x = []
#for i in range(len(data)):
 #   lst_x.append([data.number_of_slot[i], data.number_of_day[i], data.number_of_weekday[i], data.number_of_hour[i]])

#lst_y = []
#for i in range(len(data)):
 #   lst_y.append([round(data.stream1[i],6) , round(data.stream2[i], 6), round(data.stream3[i], 6),  round(data.stream4[i], 6), round(data.stream5[i], 6), round(data.bandwidth_available_OTT[i], 6)])

x = data[list_of_train]

y = data[list_of_target]

rf_random.fit(x, y)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed:  5.9min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed: 11.6min finished
  self.best_estimator_.fit(X, y, **fit_params)


RandomizedSearchCV(cv=3, error_score='raise-deprecating',
          estimator=RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
           max_features='auto', max_leaf_nodes=None,
           min_impurity_decrease=0.0, min_impurity_split=None,
           min_samples_leaf=1, min_samples_split=2,
           min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
           oob_score=False, random_state=None, verbose=0, warm_start=False),
          fit_params=None, iid='warn', n_iter=100, n_jobs=-1,
          param_distributions={'n_estimators': [100, 300, 400, 600, 800, 1000], 'max_depth': [20, 30, 40, 50, 60, 70, 80, 90, 100, 110, None], 'min_samples_split': [8, 10, 12, 14], 'min_samples_leaf': [3, 4, 5, 6]},
          pre_dispatch='2*n_jobs', random_state=42, refit=True,
          return_train_score='warn', scoring=None, verbose=2)

In [69]:
rf_random.best_params_

AttributeError: 'RandomizedSearchCV' object has no attribute 'best_params_'

In [107]:
import pandas as pd
import numpy as np

list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
#list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']
list_of_target = ['stream1']

data = pd.read_csv("testing_data.csv")

x = data[list_of_train]

y = data[list_of_target]

def evaluate(model, test_features, test_labels):
    predictions = model.predict(test_features)
    errors = abs(predictions - test_labels)
    mape = 100 * np.mean(errors / test_labels)
    accuracy = 100 - mape
    print('Model Performance')
    #print('Average Error: {:0.4f} degrees.'.format((np.mean(errors)))
    print('Average Error: ' , np.mean(errors))
    print('Accuracy = ' , accuracy)
    
    return accuracy

base_model = RandomForestRegressor(n_estimators=200, min_samples_split=12, min_samples_leaf=5,max_features='auto',max_depth=110,bootstrap=True)
base_model.fit(x, y)
base_accuracy = evaluate(base_model, x, y)



ValueError: Unable to coerce to Series, length must be 1: given 2256

In [96]:

base_model = RandomForestRegressor(n_estimators=400, min_samples_split=6, min_samples_leaf=4,max_features='auto',max_depth=40,bootstrap=True)
base_model.fit(x, y)
base_accuracy = evaluate(base_model, x, y)

Model Performance
Average Error:  stream1                    0.091030
stream2                    0.099236
stream3                    0.469849
stream4                    0.131067
stream5                    0.374881
bandwidth_available_OTT    0.130485
dtype: float64
Accuracy =  stream1                         -inf
stream2                         -inf
stream3                    35.182494
stream4                         -inf
stream5                    83.843340
bandwidth_available_OTT         -inf
dtype: float64


In [76]:
import pandas as pd
import numpy as np


list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']
#list_of_target = ['advertising_earnings1','advertising_earnings2','advertising_earnings3','advertising_earnings4','advertising_earnings5']
#list_of_target = ['stream3']
#list_of_target = ['bandwidth_available_OTT']

data = pd.read_csv("train_data.csv")

x = data[list_of_train]

y = data[list_of_target]

from sklearn.model_selection import train_test_split
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#X_train, X_test, y_train, y_test = train_test_split(x,y, test_size=0.3)


#lst_x = []
#for i in range(len(data)):
 #   lst_x.append([data.number_of_slot[i], data.number_of_day[i], data.number_of_weekday[i], data.number_of_hour[i]])

from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

reg = RandomForestRegressor(n_estimators=400, min_samples_split=10, min_samples_leaf=4,max_features='auto',max_depth=70,bootstrap=True)

#reg=RandomForestRegressor(n_estimators=200, min_samples_split=5, min_samples_leaf=4, max_features='auto',max_depth=10,bootstrap=True)

#reg = RandomForestClassifier(n_estimators=200, min_samples_split=3, min_samples_leaf=2,max_features='auto',max_depth=50)

x_d = pd.read_csv("test_data.csv")

#lst_test_x = []
#for i in range(len(x_d)):
 #   lst_test_x.append([x_d.number_of_slot[i], x_d.number_of_day[i], x_d.number_of_weekday[i], x_d.number_of_hour[i]])

test_x = x_d[list_of_train]

# fit the model to the training data (learn the coefficients)
reg.fit(x,y)

y_pred=reg.predict(test_x)

# make predictions on the testing set
#y_pred = reg.predict(test_x)

lst = []
for i in range(len(y_pred)):
    round_lst = []
    for j in range(len(y_pred[i])):
        round_lst.append(round(y_pred[i][j], 6))
        
    lst.append(round_lst)

#predstream = np.zeros((2256,5))

#for i in range(2256):
 #   for j in range(5):
  #      predstream[i,j] =lst_y[i,j] 
        
#m = np.zeros((2256,1))
#max_val = np.zeros((2256,1))
#for k in range(2256):
 #   for l in range(5):
  #      if max_val[k] < predstream[k,l]:
   #         max_val[k] =  predstream[k,l]
    #        m[k] = l

#for i in range(len(m)):
 #   print(m[i])

for i in range(len(lst)):
    print(lst[i])
    
print(len(lst))

#Import scikit-learn metrics module for accuracy calculation
#from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
#print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

#print(np.mean(abs(y_pred - y_test)))

[0.322942, 0.169792, 2.30792, 0.481834, 4.599098, 4.882274]
[0.205695, 0.345929, 1.442795, 0.401561, 4.593457, 6.167843]
[0.341558, 0.540164, 1.298311, 0.175454, 4.849099, 6.479983]
[0.928631, 0.829293, 2.26794, 0.143082, 2.821149, 7.432555]
[0.845322, 0.71866, 3.00799, 0.17699, 3.155803, 7.305286]
[0.570099, 0.542175, 3.338602, 0.150504, 4.129281, 7.413443]
[0.381817, 0.635065, 1.647458, 1.040262, 4.634026, 5.792036]
[0.408499, 0.557798, 1.603856, 0.958696, 4.669335, 5.618709]
[0.452065, 0.827297, 0.484196, 0.71776, 3.931392, 3.588442]
[0.579208, 1.586331, 0.409369, 0.695492, 1.398635, 2.695608]
[0.349763, 0.531289, 0.548259, 0.542735, 2.543668, 1.763388]
[0.540279, 0.592426, 0.874172, 0.39039, 1.689901, 1.37132]
[0.720619, 1.052858, 0.453679, 0.130593, 1.272732, 0.728511]
[0.946606, 1.364069, 0.505952, 0.175076, 1.7745, 0.558985]
[0.867254, 1.932189, 1.062489, 0.125269, 0.822336, 0.935437]
[0.675192, 1.349974, 2.635921, 0.175217, 1.433167, 0.913742]
[0.827882, 1.085273, 1.437351, 0.2

[0.045521, 0.835587, 0.543569, 0.580478, 4.996571, 3.686499]
[0.002256, 1.643003, 0.396411, 0.673223, 2.259065, 2.637556]
[0.019295, 0.513772, 0.629075, 0.504956, 3.342219, 1.774269]
[0.0, 0.580063, 0.853592, 0.58626, 2.79186, 1.368236]
[0.000894, 1.04571, 0.365884, 0.138909, 1.872044, 0.743488]
[0.0, 1.422802, 0.781058, 0.162947, 2.857232, 0.564535]
[0.000333, 1.854427, 1.356608, 0.160501, 1.142787, 0.92612]
[0.006924, 1.350619, 2.316081, 0.173504, 2.356115, 0.899818]
[0.013874, 1.08786, 1.880461, 0.289882, 1.375712, 0.481996]
[0.0, 1.239639, 2.028569, 0.387364, 5.432975, 0.026474]
[0.000597, 1.310342, 1.578371, 0.465333, 2.599224, 0.043158]
[0.0, 1.110808, 2.309135, 0.575114, 4.492804, 0.479221]
[0.004397, 0.665087, 3.992471, 0.876258, 6.290377, 0.747467]
[0.037794, 0.447982, 2.493555, 1.088459, 7.77296, 0.914802]
[0.009876, 0.287304, 2.019676, 0.473664, 11.997839, 1.368156]
[0.036895, 0.160247, 1.513909, 0.678356, 7.474946, 3.006784]
[0.023893, 0.17212, 1.678838, 0.552908, 5.069226,

[0.026887, 0.166636, 2.161648, 0.566633, 6.377591, 4.969667]
[0.003464, 0.357071, 1.402977, 0.494431, 6.025985, 6.400514]
[0.004573, 0.588186, 2.260955, 0.236649, 7.00008, 6.753656]
[0.0, 0.8329, 1.996168, 0.158751, 4.107835, 7.231202]
[0.001059, 0.759771, 2.527793, 0.13999, 4.453834, 7.280178]
[0.013631, 0.523899, 2.964176, 0.156143, 5.884593, 7.163184]
[0.000365, 0.735166, 1.383971, 0.91916, 7.981138, 5.886152]
[0.000365, 0.485345, 0.935091, 0.802649, 7.02711, 5.46724]
[0.045521, 0.835587, 0.543569, 0.580478, 4.996571, 3.686499]
[0.002256, 1.643003, 0.396411, 0.673223, 2.259065, 2.637556]
[0.019295, 0.513772, 0.629075, 0.504956, 3.342219, 1.774269]
[0.0, 0.580063, 0.853592, 0.58626, 2.79186, 1.368236]
[0.000894, 1.04571, 0.365884, 0.138909, 1.872044, 0.743488]
[0.0, 1.422802, 0.781058, 0.162947, 2.857232, 0.564535]
[0.000333, 1.854427, 1.356608, 0.160501, 1.142787, 0.92612]
[0.006924, 1.350619, 2.316081, 0.173504, 2.356115, 0.899818]
[0.013874, 1.08786, 1.880461, 0.289882, 1.375712, 

In [50]:
import pandas as pd
import numpy as np

data = pd.read_csv("submission_pikapika.csv")

lst = []
for i in range(len(data)):
    lst.append([round(data.stream1[i],6) , round(data.stream2[i], 6), round(data.stream3[i], 6),  round(data.stream4[i], 6), round(data.stream5[i], 6)])

#5-1-3-4-2

#for
new_lst = []
for i in range(len(lst)):
    mark = 0
    max_value = lst[i].index(max(lst[i]))
    for k in range(len(lst[i])):
            if(abs(max_value - lst[i][k]) <= 1.9):
                if((lst[i].index(lst[i][j]) == 0 and lst[i].index(lst[i][k]) == 1) or (lst[i].index(lst[i][k]) == 0 and lst[i].index(lst[i][j]) == 1)):
                    mark = 1
                    new_lst.append(1)
                elif((lst[i].index(lst[i][j]) == 0 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 0 and lst[i].index(lst[i][j]) == 2)):
                    mark = 1
                    new_lst.append(1)
                elif((lst[i].index(lst[i][j]) == 0 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 0 and lst[i].index(lst[i][j]) == 3)):
                    mark = 1
                    new_lst.append(1)
                elif((lst[i].index(lst[i][j]) == 0 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 0 and lst[i].index(lst[i][j]) == 4)):
                    mark = 1
                    new_lst.append(5)
                elif((lst[i].index(lst[i][j]) == 1 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 1 and lst[i].index(lst[i][j]) == 2)):
                    mark = 1
                    new_lst.append(3)
                elif((lst[i].index(lst[i][j]) == 1 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 1 and lst[i].index(lst[i][j]) == 3)):
                    mark = 1
                    new_lst.append(4)
                elif((lst[i].index(lst[i][j]) == 1 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 1 and lst[i].index(lst[i][j]) == 4)):
                    mark = 1
                    new_lst.append(5)
                elif((lst[i].index(lst[i][j]) == 2 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 2 and lst[i].index(lst[i][j]) == 3)):
                    mark = 1
                    new_lst.append(3)
                elif((lst[i].index(lst[i][j]) == 2 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 2 and lst[i].index(lst[i][j]) == 4)):
                    mark = 1
                    new_lst.append(5)
                elif((lst[i].index(lst[i][j]) == 3 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 3 and lst[i].index(lst[i][j]) == 4)):
                    mark = 1
                    new_lst.append(5)
                    
    if(mark == 0):
        max_value = max_value + 1
        new_lst.append(max_value)
                
for i in range(len(new_lst)):
    print(new_lst[i])



IndexError: list index out of range

In [93]:
import pandas as pd
import numpy as np

data = pd.read_csv("submission_pikapika.csv")

lst = []
for i in range(len(data)):
    lst.append([data.stream1[i] , data.stream2[i], data.stream3[i],  data.stream4[i], data.stream5[i]])

#5-1-3-4-2

#for
new_lst = []
mark1_lst = []
mark_lst = []
for i in range(len(lst)):
    mark = 0
    max_value = lst[i].index(max(lst[i]))

    for k in range(len(lst[i])):
        if(((max(lst[i]) - lst[i][k]) <= 1.35) and ((max(lst[i]) - lst[i][k]) > 0) and (max(lst[i]) != lst[i][k])):
                if((max_value == 0 and lst[i].index(lst[i][k]) == 1) or (lst[i].index(lst[i][k]) == 0 and max_value == 1)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 0 and max_value == 2)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 0 and max_value == 3)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 0 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 1 and max_value == 2)):
                    mark = 1
                    new_lst.append(3)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 1 and max_value == 3)):
                    mark = 1
                    new_lst.append(4)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 1 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 2 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 2 and max_value == 3)):
                    mark = 1
                    new_lst.append(3)
                    break
                elif((max_value == 2 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 2 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 3 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 3 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
    
    if(mark == 0):
        max_value = max_value + 1
        new_lst.append(max_value)
        
for i in range(len(new_lst)):
        print(new_lst[i])
        
print(len(new_lst))

2
5
1
1
2
2
2
1
1
1
1
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
1
5
1
1
1
5
1
5
5
5
5
5
5
5
5
1
3
3
5
5
1
2
5
1
2
2
2
3
1
1
1
1
3
5
5
5
5
5
5
5
5
5
5
5
5
1
5
5
5
5
1
3
1
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
5
3
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
4
5
5
5
5
3
5
1
5
5
5
5
5
5
5
3
1
1
1
3
3
1
1
1
4
1
1
1
1
3
1
1
1
1
3
3
3
1
1
5
5
5
1
5
3
5
5
5
2
5
1
1
2
2
2
1
1
1
1
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
1
5
1
1
1
5
1
5
5
5
5
5
5
5
5
1
3
3
5
5
1
2
5
1
2
2
2
3
1
1
1
1
3
5
5
5
5
5
5
5
5
5
5
5
5
1
5
5
5
5
1
3
1
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
5
3
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
4
5
5
5
5
3
5
1
5
5
5
5
5
5
5
3
1
1
1
3
3
1
1
1
4
1
1
1
1
3
1
1
1
1
3
3
3
1
1
5
5
5
1
5
3
5
5
5
2
5
1
1
2
2
2
1
1
1
1
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
1
5
1
1
1
5
1
5
5
5
5
5
5
5
5
1
3
3
5
5
1
2
5
1
2
2
2
3
1
1
1
1
3
5
5
5
5
5
5
5
5
5
5
5
5
1
5
5
5
5
1
3
1
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
5
3
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
4
5
5
5
5
3
5
1
5
5
5
5
5
5
5
3
1
1
1
3
3
1
1
1
2
1
1
3
2
2
3
3
3
3
3
3
3
1
1
5
5
5
1
1


In [82]:
#Import scikit-learn dataset library
from sklearn import datasets

#Load dataset
iris = datasets.load_iris()

# print the label species(setosa, versicolor,virginica)
print(iris.target_names)

# print the names of the four features
print(iris.feature_names)

# print the iris data (top 5 records)
print(iris.data[0:5])

# print the iris labels (0:setosa, 1:versicolor, 2:virginica)
print(iris.target)

import pandas as pd
data=pd.DataFrame({
    'sepal length':iris.data[:,0],
    'sepal width':iris.data[:,1],
    'petal length':iris.data[:,2],
    'petal width':iris.data[:,3],
    'species':iris.target
})
data.head()

# Import train_test_split function
from sklearn.model_selection import train_test_split

X=data[['sepal length', 'sepal width', 'petal length', 'petal width']]  # Features
y=data['species']  # Labels

# Split dataset into training set and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3) # 70% training and 30% test

#Import Random Forest Model
from sklearn.ensemble import RandomForestClassifier

#Create a Gaussian Classifier
clf=RandomForestClassifier(n_estimators=100)

#Train the model using the training sets y_pred=clf.predict(X_test)
clf.fit(X_train,y_train)

y_pred=clf.predict(X_test)

#Import scikit-learn metrics module for accuracy calculation
from sklearn import metrics
# Model Accuracy, how often is the classifier correct?
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))

print(y_train)

['setosa' 'versicolor' 'virginica']
['sepal length (cm)', 'sepal width (cm)', 'petal length (cm)', 'petal width (cm)']
[[5.1 3.5 1.4 0.2]
 [4.9 3.  1.4 0.2]
 [4.7 3.2 1.3 0.2]
 [4.6 3.1 1.5 0.2]
 [5.  3.6 1.4 0.2]]
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]
Accuracy: 0.9777777777777777
129    2
66     1
10     0
98     1
107    2
59     1
78     1
37     0
81     1
83     1
138    2
14     0
18     0
21     0
116    2
25     0
69     1
34     0
65     1
92     1
125    2
112    2
70     1
53     1
115    2
146    2
6      0
91     1
36     0
64     1
      ..
120    2
141    2
1      0
55     1
111    2
119    2
122    2
139    2
128    2
117    2
68     1
56     1
61     1
140    2
79     1
71     1
77     1
148    2
63     1


In [86]:
import pandas as pd
import numpy as np

data = pd.read_csv("submission_pikapika.csv")

lst = []
for i in range(len(data)):
    lst.append([data.stream1[i] , data.stream2[i], data.stream3[i],  data.stream4[i], data.stream5[i]])

#5-1-3-4-2

#for
new_lst = []
mark1_lst = []
mark_lst = []
for i in range(len(lst)):
    mark = 0
    max_value = lst[i].index(max(lst[i]))

    if(abs(max(lst[i]) - min(lst[i])) >= 3.2 and (min(lst[i]) != 0)):
        new_lst.append(max_value+1)
    else:
        new_lst.append(0)
            
for i in range(len(new_lst)):
        print(new_lst[i])
        
print(len(new_lst))

5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
0
0
5
5
5
5
5
5
5
5
5
5
5
5
0
5
0
0
0
0
0
0
5
0
5
5
5
5
5
5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
5
5
5
5
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0
0
0
0
0
0
0
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
5
0
0
0
0
0
0
3
0
0
0
0
0
0
0
0
2
0
0
0
0
0
3
0
5
0
5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
0
0
5
5
5
5
5
5
5
5
5
5
5
5
0
5
0
0
0
0
0
0
5
0
5
5
5
5
5
5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
5
5
5
5
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0
0
0
0
0
0
0
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
0
0
0
5
0
0
0
0
0
0
3
0
0
0
0
0
0
0
0
2
0
0
0
1
1
3
0
5
0
5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
0
0
5
5
5
5
5
5
5
5
5
5
5
5
0
5
0
0
0
0
0
0
5
0
5
5
5
5
5
5
5
5
0
0
5
5
5
5
0
0
0
0
0
0
0
0
5
0
5
5
5
5
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
2
0
0
0
0
0
0
0
5
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
5
0
0
0
0
0
0
3
0
0
0
0
0
0
0
0
2
0
0
0
0
0


In [68]:

from matplotlib import pyplot

import pandas as pd
import numpy as np


list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
#list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']
#list_of_target = ['advertising_earnings1','advertising_earnings2','advertising_earnings3','advertising_earnings4','advertising_earnings5']
list_of_target = ['stream5']
#list_of_target = ['bandwidth_available_OTT']

data = pd.read_csv("train_data_original.csv")

y = data[list_of_target]
# Tail-rolling average transform
rolling = y.rolling(window=3)
rolling_mean = rolling.mean()

lst_test_x = []
for i in range(len(rolling_mean)):
    lst_test_x.append([rolling_mean.stream5[i]])
    
for i in range(len(lst_test_x)):
    print(lst_test_x[i])




[nan]
[nan]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.6256727569999999]
[1.458787594333333]
[2.3384412986666665]
[2.171088512333333]
[1.977088831333333]
[1.5628896279999995]
[1.8040750589999996]
[1.8800990949999996]
[2.045983979666666]
[1.657037890333333]
[1.429444166333333]
[0.9964615119999998]
[0.8446986543333331]
[0.5917473466666666]
[0.48692271299999995]
[0.6561705809999999]
[0.5501200593333332]
[1.1012800976666666]
[1.126810785]
[1.599643231]
[1.7748916709999998]
[2.5169401739999997]
[3.713850538333333]
[3.653824126]
[3.113034753333333]
[1.8949664269999997]
[1.7394438886666663]
[1.6636701736666666]
[1.4457127773333334]
[1.594543120666667]
[2.235564938]
[2.737299897333333]
[2.3636412206666666]
[1.5169811073333335]
[0.9600031456666667]
[0.9553933490000001]
[0.9360397160000002]
[0.8400377393333335]
[0.5708415233333335]
[0.5805776616666669]
[0.4958646456666669]
[1.0331814480000003]
[1.13442668

[1.2621635346666633]
[1.5696150483333298]
[1.9094887979999964]
[2.693223618666663]
[2.834042942666663]
[3.299242620666663]
[3.2977547729999963]
[3.9540181026666636]
[3.8361270166666634]
[3.351148448666663]
[2.9023047073333292]
[3.3145905903333297]
[4.49849396533333]
[4.449724533999996]
[3.6669854466666627]
[2.733279777666663]
[2.2908612183333297]
[2.190723533666663]
[1.963956064666663]
[1.562198426666663]
[1.6743171919999964]
[1.1426416893333295]
[2.0902385303333295]
[2.018416531666663]
[3.006235311999996]
[3.295106397333329]
[4.095836439333329]
[5.05526889833333]
[4.825956434999996]
[5.680301142999997]
[5.606428769666664]
[6.696037004999997]
[5.540295943666664]
[4.618588616666664]
[4.643057040666665]
[5.4001969706666655]
[6.921373159666665]
[6.026910251666666]
[4.9401504833333325]
[3.4729667749999993]
[3.028629550666666]
[3.1346441899999995]
[2.8457593479999996]
[2.2736434483333325]
[2.4453518533333325]
[1.8070352109999994]
[3.419862524999999]
[3.221891730666666]
[4.681591163999999]
[

[1.2258629609999898]
[1.1062463939999898]
[0.9251331693333231]
[0.8552450099999898]
[0.6267597836666564]
[0.7480786046666564]
[0.5234708733333232]
[0.8596879759999899]
[0.94966011999999]
[1.2333652593333233]
[1.3238776436666566]
[1.99784662099999]
[2.5154313999999904]
[3.0296259253333235]
[3.2191309413333236]
[3.733053906666657]
[3.7860503849999905]
[3.188665593666657]
[3.046918283333323]
[2.7466977133333224]
[3.7902382426666557]
[4.300455254666656]
[4.6558607469999895]
[3.500741597666656]
[2.4725460426666563]
[1.813877712666656]
[1.7554484466666562]
[1.5158003366666561]
[1.2618301469999895]
[1.310286053333323]
[1.0903259483333227]
[2.219286451666656]
[2.1566642966666563]
[3.0018056556666566]
[3.0344763686666574]
[4.093074641666657]
[5.941112674333323]
[5.9166165696666555]
[6.920449956333324]
[6.5878514689999905]
[7.953720889999992]
[6.464408881666657]
[5.088221202333323]
[3.5843503446666567]
[5.1978459579999905]
[6.533433344999991]
[6.635588461666657]
[4.5570606343333235]
[3.384797056

[2.688548729333327]
[2.449257566666661]
[1.9760940649999945]
[1.5154778353333278]
[2.223117608333328]
[2.7315438406666614]
[2.559661058333328]
[1.7127358256666614]
[1.1891118033333279]
[0.8993954379999947]
[0.7613192919999947]
[0.736818725333328]
[0.6450228446666614]
[0.7355854156666614]
[0.4764756063333279]
[1.0765233096666613]
[1.0558932446666611]
[1.446196974666661]
[1.6095713699999943]
[2.586873404999994]
[3.730578640999994]
[3.6053964076666603]
[4.288816225333327]
[4.355752366999994]
[4.8428557396666605]
[4.002198375999993]
[3.3258989913333266]
[3.3688734673333265]
[3.669864279999993]
[4.849156802999992]
[5.144884316333325]
[4.494392071333325]
[2.903199774666659]
[1.9640914713333262]
[1.7011371696666597]
[1.7361493463333264]
[1.308115460999993]
[1.2558971483333263]
[0.8907917353333262]
[2.3343699126666597]
[2.4861654543333263]
[3.376012681999993]
[2.9833515179999934]
[4.511067574333327]
[6.806742951666661]
[7.3807951709999955]
[7.076026080333328]
[5.847591189999993]
[7.32805672999

[0.9789995363333235]
[0.9675825553333235]
[1.1792935346666569]
[1.35673362599999]
[2.2546304113333235]
[3.4598569579999903]
[3.3569755796666563]
[3.072200275333323]
[2.2681328536666565]
[2.6488928329999895]
[2.3147045636666563]
[1.9593070483333228]
[1.4155300636666563]
[2.090978813666656]
[2.3506354456666565]
[2.3196062263333226]
[1.4028114643333227]
[1.1888322173333226]
[1.0431293893333224]
[1.0621339896666557]
[0.8747015549999894]
[0.627322702666656]
[0.5996047593333227]
[0.4473100566666561]
[0.9609411026666561]
[1.0435500693333228]
[1.610072178333323]
[1.7791342299999897]
[2.5874274539999895]
[3.611228517333323]
[3.888771033666656]
[4.27122042799999]
[4.326910471666657]
[4.6940957796666565]
[4.21603711399999]
[3.62816932699999]
[3.9019805436666566]
[5.0685655889999905]
[5.625758463999991]
[5.219211355333324]
[3.4833543819999915]
[2.5175843059999914]
[1.983476441333325]
[1.889017210333325]
[1.5517896009999916]
[1.0592018696666583]
[1.2908527696666585]
[1.084684885333325]
[2.064892436

[3.293288746333321]
[2.2778081463333213]
[2.1120317766666545]
[1.8290890639999875]
[1.557163451333321]
[1.699666800333321]
[1.4011219949999878]
[2.531908476999988]
[2.3358532843333215]
[3.0586327626666545]
[2.612263545999988]
[3.7188032316666546]
[6.557025461999987]
[7.4278765043333195]
[5.8439552693333185]
[1.911231372666652]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.8820767213333188]
[1.4599126263333186]
[2.5151586669999855]
[2.2200479816666525]
[2.1927413496666524]
[1.6867835859999853]
[2.324783532999985]
[2.9130693473333182]
[3.0621465376666515]
[2.1201110583333183]
[1.4020462533333182]
[1.0051268833333185]
[0.9178883673333184]
[0.8854105406666516]
[0.7055458116666514]
[0.8224557493333181]
[0.5923412556666515]
[0.9296922526666517]
[0.9946191896666517]
[1.3574049746666514]
[1.808464949999985]
[2.130837071333318]
[2.7610544616666512]
[2.5696226249999845]
[2.631935769333318]
[2.473472241666651

[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.0]
[0.7440199506666408]
[1.349728526666641]
[2.3267990436666413]
[2.0687006763333082]
[2.110179012333308]
[1.762021183666641]
[2.330606460333308]
[2.701281899333308]
[2.6986947289999748]
[1.9076317299999748]
[1.2351206303333082]
[0.8434789956666414]
[0.850201341333308]
[0.7831731673333081]
[0.6368173523333082]
[0.5526356726666416]
[0.39751275366664157]
[0.8387829126666414]
[0.9762434109999747]
[1.2150973289999747]
[1.3629920743333084]
[1.9084313006666418]
[2.9854153499999754]
[3.0337025803333084]
[3.0891894746666417]
[2.5195768176666418]
[2.7953863199999756]
[2.373266887333309]
[1.952839814999976]
[1.8064006909999761]
[2.274016532333309]
[2.390153793333309]
[2.3055291796666424]
[1.6626750943333093]
[1.5829895443333093]
[1.0628133406666425]
[0.9103441799999757]
[0.7697565909999758]
[0.6278878129999759]
[0.7565497299999758]
[0.5442826363333092]
[1.152400607333309]
[1.1235496399999756]

In [61]:
from sklearn.pipeline import Pipeline
import pandas as pd
import numpy as np

import numpy as np
import pandas as pd
import math

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
from sklearn.metrics import mean_squared_error

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor


list_of_train = ['number_of_slot','number_of_day','number_of_weekday','number_of_hour']
#list_of_target = ['stream1','stream2','stream3','stream4','stream5','bandwidth_available_OTT']
#list_of_target = ['advertising_earnings1','advertising_earnings2','advertising_earnings3','advertising_earnings4','advertising_earnings5']
list_of_target = ['stream3']
#list_of_target = ['bandwidth_available_OTT']

data = pd.read_csv("train_data_original.csv")

x = data[list_of_train]
y = data[list_of_target]
    
steps = [
    ('scalar', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
    ('model', RandomForestRegressor(n_estimators=400, min_samples_split=10, min_samples_leaf=4,max_features='auto',max_depth=70,bootstrap=True))
]


pipeline = Pipeline(steps)

pipeline.fit(x,y)

print('Training score: {}'.format(pipeline.score(x, y)))

x_d = pd.read_csv("test_data.csv")

#lst_test_x = []
#for i in range(len(x_d)):
 #   lst_test_x.append([x_d.number_of_slot[i], x_d.number_of_day[i], x_d.number_of_weekday[i], x_d.number_of_hour[i]])

test_x = x_d[list_of_train]


y_pred=reg.predict(test_x)

  return self.partial_fit(X, y)
  return self.fit(X, y, **fit_params).transform(X)
  Xt = transform.transform(Xt)
  Xt = transform.transform(Xt)


Training score: 0.5790132578477964
[0.447572, 0.08317, 2.236155, 0.336188, 5.258032, 8.141072]
[0.474427, 0.274823, 2.06689, 0.316275, 4.779173, 7.392411]
[0.500508, 0.450313, 1.913673, 0.298953, 4.345148, 6.684076]
[0.525815, 0.609642, 1.776504, 0.284222, 3.955956, 6.016067]
[0.550347, 0.752809, 1.655383, 0.272083, 3.611599, 5.388384]
[0.574105, 0.879813, 1.550311, 0.262534, 3.312076, 4.801026]
[0.597089, 0.990656, 1.461287, 0.255576, 3.057386, 4.253994]
[0.619299, 1.085337, 1.388311, 0.25121, 2.84753, 3.747288]
[0.640735, 1.163855, 1.331384, 0.249435, 2.682508, 3.280907]
[0.661396, 1.226211, 1.290504, 0.250251, 2.56232, 2.854852]
[0.681283, 1.272406, 1.265673, 0.253658, 2.486966, 2.469123]
[0.700396, 1.302438, 1.25689, 0.259656, 2.456446, 2.12372]
[0.718735, 1.316308, 1.264156, 0.268245, 2.470759, 1.818642]
[0.736299, 1.314017, 1.287469, 0.279425, 2.529907, 1.55389]
[0.753089, 1.295563, 1.326831, 0.293197, 2.633888, 1.329463]
[0.769105, 1.260947, 1.382241, 0.309559, 2.782703, 1.14536

[0.257249, 1.334568, 0.259542, 0.505624, -0.641561, 1.324268]
[0.283784, 1.302052, 0.315546, 0.522739, -0.506588, 1.140758]
[0.309543, 1.253373, 0.387597, 0.542445, -0.326782, 0.997573]
[0.334529, 1.188533, 0.475697, 0.564743, -0.102141, 0.894714]
[0.35874, 1.10753, 0.579846, 0.589631, 0.167333, 0.83218]
[0.382177, 1.010366, 0.700042, 0.617111, 0.481642, 0.809973]
[0.40484, 0.897039, 0.836287, 0.647181, 0.840784, 0.828091]
[0.426729, 0.76755, 0.98858, 0.679843, 1.24476, 0.886534]
[0.447843, 0.6219, 1.156921, 0.715096, 1.69357, 0.985304]
[0.468183, 0.460087, 1.341311, 0.75294, 2.187213, 1.124399]
[0.248934, 0.119629, 1.640984, 0.418722, 3.152914, 8.131025]
[0.283688, 0.312842, 1.472314, 0.399346, 2.66371, 7.382834]
[0.317668, 0.489893, 1.319692, 0.382562, 2.21934, 6.674969]
[0.350874, 0.650781, 1.183119, 0.368368, 1.819804, 6.007429]
[0.383305, 0.795508, 1.062593, 0.356766, 1.465102, 5.380214]
[0.414962, 0.924073, 0.958116, 0.347755, 1.155234, 4.793326]
[0.445845, 1.036476, 0.869687, 0.

[-0.096669, 0.463612, 0.748903, 0.517033, 1.37514, 6.660794]
[-0.060752, 0.62491, 0.613678, 0.502814, 0.97244, 5.993615]
[-0.02561, 0.770046, 0.494501, 0.491186, 0.614573, 5.366762]
[0.008759, 0.899019, 0.391372, 0.482148, 0.30154, 4.780235]
[0.042353, 1.011831, 0.304291, 0.475702, 0.033341, 4.234033]
[0.075173, 1.108481, 0.233259, 0.471847, -0.190024, 3.728157]
[0.107218, 1.188968, 0.178274, 0.470584, -0.368555, 3.262606]
[0.13849, 1.253294, 0.139338, 0.471911, -0.502252, 2.837382]
[0.168987, 1.301458, 0.116451, 0.475829, -0.591116, 2.452483]
[0.19871, 1.333459, 0.109611, 0.482339, -0.635145, 2.107909]
[0.227659, 1.349298, 0.11882, 0.491439, -0.634341, 1.803662]
[0.255833, 1.348976, 0.144077, 0.503131, -0.588703, 1.53974]
[0.283233, 1.332491, 0.185382, 0.517414, -0.498231, 1.316144]
[0.309859, 1.299844, 0.242735, 0.534288, -0.362925, 1.132873]
[0.335711, 1.251036, 0.316137, 0.553753, -0.182786, 0.989929]
[0.360789, 1.186065, 0.405587, 0.575809, 0.042188, 0.887309]
[0.385092, 1.104932,

In [128]:
import pandas as pd
import numpy as np

data = pd.read_csv("submission_pikapika.csv")

lst = []
for i in range(len(data)):
    lst.append([data.stream1[i] , data.stream2[i], data.stream3[i],  data.stream4[i], data.stream5[i]])

#5-1-3-4-2

#for
final = []
for i in range(len(lst)):
    max_value = lst[i].index(max(lst[i]))
    new_lst = []
   
    for k in range(len(lst[i])):
        if((lst[i][k] != 0) and (abs(max(lst[i]) - lst[i][k]) <= 1.35) and (max(lst[i]) != lst[i][k])):
            if((max_value == 0 and lst[i].index(lst[i][k]) == 1) or (lst[i].index(lst[i][k]) == 0 and max_value == 1)):
                    new_lst.append(1)
            elif((max_value == 0 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 0 and max_value == 2)):
                    new_lst.append(1)
            elif((max_value == 0 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 0 and max_value == 3)):
                    new_lst.append(1)
            elif((max_value == 0 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 0 and max_value == 4)):
                    new_lst.append(5)
            elif((max_value == 1 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 1 and max_value == 2)):
                    new_lst.append(3)
            elif((max_value == 1 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 1 and max_value == 3)):
                    new_lst.append(4)
            elif((max_value == 1 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 1 and max_value == 4)):
                    new_lst.append(5)
            elif((max_value == 2 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 2 and max_value == 3)):
                    new_lst.append(3)
            elif((max_value == 2 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 2 and max_value == 4)):
                    new_lst.append(5)
            elif((max_value == 3 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 3 and max_value == 4)):
                    new_lst.append(5)
    if (len(new_lst) == 0):
        new_lst.append(max_value+1)
    final.append(new_lst)

result = []
for i in range(len(final)):
    if(5 in final[i]):
        result.append(5)
    elif(1 in final[i]):
        result.append(1)
    elif(3 in final[i]):
        result.append(3)
    elif(4 in final[i]):
        result.append(4)
    elif(2 in final[i]):
        result.append(2)

print(len(result))
for i in range(len(result)):
    print(result[i])

2256
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
3
3
3
3
3
3
3
5
4
5
3
3
3
3
3
3
3
3
3
3
3
3
3
5
5
5
5
5
5
5
5
5
2
5
5
5
2
2
2
1
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
5
5
5
2
5
5
2
2
2
3
1
5
1
5
3
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
3
3
3
3
3
3
3
5
4
5
3
3
3
3
3
3
3
3
3
3
3
3
3
5
5
5
5
5
5
5
5
5
2
5
5
5
1
2
1
1
5
1
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
1
5
1
5
5
5
5
5
5
5
5
5
5
3
5
5
5
2
5
5
1
1
2
1
1
5
1
1
3
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
3
3
3
3
3
3
3
5
4
5
3
3
3
3
3
3
3
3
3
3
3
3
3
5
5
5
5
5
5
5
5
5
2
5
5
5
2
2
2
1
5
1
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
5
3
5
5
5
2
5
5
2
2
2
3
1
5

In [None]:
'''    
    for j in range(len(lst[i])):
        if()
mark1_lst = []
mark_lst = []
for i in range(len(lst)):
    mark = 0
    max_value = lst[i].index(max(lst[i]))

    for k in range(len(lst[i])):
        if(((max(lst[i]) - lst[i][k]) <= 1.35) and ((max(lst[i]) - lst[i][k]) > 0) and (max(lst[i]) != lst[i][k])):
                if((max_value == 0 and lst[i].index(lst[i][k]) == 1) or (lst[i].index(lst[i][k]) == 0 and max_value == 1)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 0 and max_value == 2)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 0 and max_value == 3)):
                    mark = 1
                    new_lst.append(1)
                    break
                elif((max_value == 0 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 0 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 2) or (lst[i].index(lst[i][k]) == 1 and max_value == 2)):
                    mark = 1
                    new_lst.append(3)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 1 and max_value == 3)):
                    mark = 1
                    new_lst.append(4)
                    break
                elif((max_value == 1 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 1 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 2 and lst[i].index(lst[i][k]) == 3) or (lst[i].index(lst[i][k]) == 2 and max_value == 3)):
                    mark = 1
                    new_lst.append(3)
                    break
                elif((max_value == 2 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 2 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
                elif((max_value == 3 and lst[i].index(lst[i][k]) == 4) or (lst[i].index(lst[i][k]) == 3 and max_value == 4)):
                    mark = 1
                    new_lst.append(5)
                    break
    
    if(mark == 0):
        max_value = max_value + 1
        new_lst.append(max_value)
        
for i in range(len(new_lst)):
        print(new_lst[i])
        
print(len(new_lst))
'''