# Statistical Learning Model - Production


Import libraries

In [None]:
#!conda update scikit-learn

In [18]:
import numpy as np
import pandas as pd

from matplotlib import pyplot as plt

import time

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from lightgbm import LGBMClassifier
from sklearn.model_selection import train_test_split

# from sklearn.feature_extraction import TfidfVectorizer ===> gets an error
# pls use the import below
from sklearn.feature_extraction.text import TfidfVectorizer 

from scipy.sparse import hstack, vstack                            # used on bow of workds, vetoctored the 'Title' column

from sklearn.metrics import roc_auc_score, average_precision_score


%matplotlib inline
%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [19]:
import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__))

The scikit-learn version is 0.21.3.


## Load final raw data with labels

In [20]:
df = pd.read_csv('./raw_data/final_labels_dataset.csv', index_col=0)

print('Dataset shape before: ', df.shape)
print('Dataset columns: ', df.columns)

# drop duplicated data
df.drop_duplicates(inplace=True)

print('Dataset shape after: ', df.shape)

print('Check duplicates: ', df.duplicated().mean())
df.tail(2)

Dataset shape before:  (727, 5)
Dataset columns:  Index(['title', 'y', 'upload_date', 'view_count', 'new'], dtype='object')
Dataset shape after:  (701, 5)
Check duplicates:  0.0


Unnamed: 0,title,y,upload_date,view_count,new
1016,Data Scientists vs Data Engineers: Which one i...,1.0,20191219,183723,1.0
1468,#kaggle #DataScience Machine Learning MicroCou...,0.0,20190730,405,1.0


# 1. Data Cleanup

Using a NEW dataframe with data that's ready and clean the data to fit the ML model.

In [21]:
# create a clean dataframe with the same indice on the original dataframe - raw data
df_clean = pd.DataFrame(index=df.index)

In [22]:
df_clean['date'] = pd.to_datetime(df['upload_date'], format='%Y%m%d')

# note: format='%Y %m %d' shows the time; format='%Y%m%d' brings only YYYY-MM-DD - easy!

In [23]:
df_clean['date']

# dtype: datetime64[ns] used by numpy and pandas

0      2021-05-05
1      2021-05-05
2      2021-05-05
3      2021-05-05
4      2021-05-05
          ...    
1041   2019-11-18
488    2018-11-25
528    2018-07-24
1016   2019-12-19
1468   2019-07-30
Name: date, Length: 701, dtype: datetime64[ns]

In [24]:
# columns views: make sure all NAN will be convert to 0 and an integer data type will be added
df_clean['views'] = df['view_count'].fillna(0).astype(int64)

In [25]:
# adding title column. It will be used on the model....will be vectorized later
df_clean['title'] = df['title']

In [26]:
df_clean.dtypes

date     datetime64[ns]
views             int64
title            object
dtype: object

## 2.Features & Labels

Create an unique features dataframe. JUST an extra step. Making sure the features are ready.

**Reason**: Align the feaatures dataframe with the most cleaning data - raw data collected & cleaned. The cleaning process can skip rows or columns.


In [27]:
# features: it's similar to df_clean, just an extra step
features = pd.DataFrame(index=df_clean.index)

# labels/targets
y = df['y'].copy()

In [28]:
print('Features shape: {}'.format(shape(features)))
print('Labels shape: {}'.format(shape(y)))

Features shape: (701, 0)
Labels shape: (701,)


## Important: sklearn can't use *date* as a feature.

Let's manipulate and create a feature using the raw date - **Num_views_per_day**.

Sklearn needs a number.

In [29]:
# time_since_pub: time since the video was published. Random data choose. Use the date I created this code: fix date point - 2021-05-09

# np.timedelta64(1, 'D'): time delta in numpy. Difference in days
# we have data on a granually day, meaning a difference less than a day makes sense.
features['time_since_pub'] = (pd.to_datetime("2021-05-09") - df_clean['date']) / np.timedelta64(1, 'D')

# used features
features['views'] = df_clean['views']
features['views_per_day'] = features['views'] / features['time_since_pub']

features = features.drop(['time_since_pub'], axis=1)   # time_since_pub only used for the calculation

# time_since_pub as a feature may impact the model once the numbers seem to increase a lot and the end of the time serie.
# The training&validations datasets may not have a normal distributed values.Thus, an umbalaced feature weights
# and random samples are important to train and fit a ml model


In [30]:
features.tail()

Unnamed: 0,views,views_per_day
1041,31788,59.085502
488,65724,73.352679
528,34286,33.613725
1016,183723,362.372781
1468,405,0.624037


In [31]:
features.describe()

Unnamed: 0,views,views_per_day
count,701.0,701.0
mean,152549.0,521.359455
std,2042103.0,4284.958273
min,0.0,0.0
25%,339.0,4.977941
50%,2892.0,37.956522
75%,19972.0,180.0
max,48724170.0,95913.728346


## 3. Data Preparation

Let's try to split the train&validation datasets 50/50.

How the 2 features **view** and **views_per_day** impacted the ML model? 
Does a simple model with only 2 features impact the way the YouTube videos will be selected?

In [32]:
# check all data on df_clean
# pd.set_option('display.max_rows', 527)
# df_clean

median_date = df_clean['date'].quantile(0.5, interpolation="midpoint")
median_date

# median date 2021-03-12 before

Timestamp('2021-01-10 00:00:00')

In [34]:
# splitting features dataset - trying a 50/50 using a median date
# balanced dataset is important!!!

# code below can also be used
# Xtrain, Xval = features[df_clean['date'] < '2021-03-12'], features[df_clean['date'] >= '2021-03-12']
# ytrain, yval = y[df_clean['date'] < '2021-03-12 '], y[df_clean['date'] >= '2021-03-12 ']

# needed approach - mask parameter to select the data
mask_train = df_clean['date'] < '2021-01-10'
mask_val = df_clean['date'] >= '2021-01-10'

Xtrain, Xval = features[mask_train], features[mask_val]
ytrain, yval = y[mask_train], y[mask_val]

Xtrain.shape, Xval.shape, ytrain.shape, yval.shape

# datasets, training & validation, not huge. But,...

((349, 2), (352, 2), (349,), (352,))

## Add the title feature

**Important**: transforming the Title string to numbers.

Building a matrix in which each column will be the counting word from the Title feature.

Import to notice that commom words like machine+learning will have a low weight.



In [35]:
title_train = df_clean[mask_train]['title']
title_val = df_clean[mask_val]['title']

# Vectorizing the Title features
title_vec = TfidfVectorizer(min_df=2, ngram_range=(1,3))   # object defined
# mind_df = 2 means the minimum numnber of words that be used to create a column
# ngram_range=(1,3) - combining words to maximum 3 words

# bow: bag of words
title_bow_train = title_vec.fit_transform(title_train)     # fit + transform: store the words on the features, plus how many times the word appeared
title_bow_val = title_vec.transform(title_val)             # validation set ONLY transform. Validation should NOT learning the words

Without ngram_range=(1,3): Shape for title bag of words matrix:  (349, 289)
with ngram_range=(1,3):   Shape for title bag of words matrix:  (349, 719)

In [36]:
# checking
print('Shape for title bag of words matrix: ', title_bow_train.shape)
title_bow_train

Shape for title bag of words matrix:  (349, 719)


<349x719 sparse matrix of type '<class 'numpy.float64'>'
	with 3968 stored elements in Compressed Sparse Row format>

 TfidfVectorizer function returns a vectorized sparse matrix. It's an optimize matrix in Scipy where only values NOT equal to zero are returned.

In [66]:
# the sparse matrix 'title_bow_train' contains 3968  elements NOT ZERO
1 - 2286 /(349*719)   
# % of ZERO elements on the sparse matrix, but only 3% are NOT ZERO elements. Meaning that the matrix is sparse computationally and mathematically speaking

0.9908899259158892

## IMPORTANT to note: 
Combining simple matrix - Xtrain&Xval - with a sparse matrix - title_bow_train & title_bow_val

Use scipy.sparse hstack and vstack

More details on hstack and vstack...stacking matrix (vectoes) horizontally and vertically

Sample:

hstack - [1 2]    [3 4]  -> [1 2 3 4]

vstack [1 2]      [3 4]  -> [1 2]
                            [3 4]
                            
USE *scipy.sparse hstack and vstack*, numpy sparse function may take TOO LONG, or not compute at all!!

In [37]:
# combining sparse matrix with original features
from scipy.sparse import hstack, vstack  

Xtrain_wtitle = hstack([Xtrain, title_bow_train])
Xval_wtitle = hstack([Xval, title_bow_val])

In [38]:
Xtrain_wtitle.shape, Xval_wtitle.shape

# 2 nummerical features on training dataset plus 289 columns from 'Title'

((349, 721), (352, 721))

## Random Forest

In [69]:
# check number of 1 samples under train dataset
print('Positive samples - videos select: {}'.format(ytrain.mean() * 349))
print(' % of positive samples - videos select: {}'.format(ytrain.mean() * 100))

# definitely unbalaced

Positive samples - videos select: 107.0
 % of positive samples - videos select: 30.659025787965614


The training dataset is not big!!

In [70]:
clf_rf = RandomForestClassifier(n_estimators=1000, random_state=0, min_samples_leaf=1, class_weight='balanced', n_jobs=6)    # defined object

## Fitting the model against the train dataset

NOW: 3 features - views, views per day, and title

In [71]:
clf_rf.fit(Xtrain_wtitle, ytrain)

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_store_unique_indices = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.shape, dtype=np.int)
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  y_encoded = np.zeros(y.

RandomForestClassifier(bootstrap=True, class_weight='balanced',
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, min_impurity_decrease=0.0,
                       min_impurity_split=None, min_samples_leaf=1,
                       min_samples_split=2, min_weight_fraction_leaf=0.0,
                       n_estimators=1000, n_jobs=6, oob_score=False,
                       random_state=0, verbose=0, warm_start=False)

In [52]:
print('ML model already trainded/fitted and ready to be used!')

ML model already trainded/fitted and ready to be used!


## Predicting if a video has been select

Probability = 1

predict_proba: returns a numpy array with prob of zero and prob of 1

In [72]:
pred = clf_rf.predict_proba(Xval_wtitle)[:, 1]   # only 1

Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  dtype=np.int)


## Metrics - validating the model


 Before: Random Forest Model

average precision: 0.4566659359075169

Roc Auc: 0.5182456140350877

In [73]:
# area of precision for decision tree
print('Random Forest baseline model - average precision')
average_precision_score(yval, pred)

Random Forest baseline model - average precision


0.4605227155328958

## IMPORTANT: any future model in PRD should have a greater than baseline model **0.50**

In [74]:
# area under curve of roc curve metric
print('Random Forest baseline model - roc auc')
roc_auc_score(yval, pred)

Random Forest baseline model - roc auc


0.5253960396039604

## Validating metrics before and after the active learning samples

Increassing the validation dataset metrics:

roc auc: 0.47

average precision: 0.41

----------------------------------------------------------
Original decision metric - baseline model

roc auc: 0.49

average precision: 0.42

---------------------------------------------------------

Metrics seem to be on the variation range. Meaning AP & ROC AUC are close enough between baseline model and new model with active learning samples.
Please note that the number of samples are not significantly big, then the metrics might not change a lot...small steps definitely count!!!

---------------------------------------------------------

## Productin Baseline Model
roc auc: 0.46

average precision: 0.52

**Details**: TfidfVectorizer(min_df=2, ngram_range=(1,3)) 
---------------------------------------------------------


## LigthGBM Model

In [39]:
cl_lgbm = LGBMClassifier(random_state=0, class_weight='balanced', n_jobs=6)   # defined object

In [40]:
cl_lgbm.fit(Xtrain_wtitle, ytrain)

LGBMClassifier(boosting_type='gbdt', class_weight='balanced',
               colsample_bytree=1.0, importance_type='split', learning_rate=0.1,
               max_depth=-1, min_child_samples=20, min_child_weight=0.001,
               min_split_gain=0.0, n_estimators=100, n_jobs=6, num_leaves=31,
               objective=None, random_state=0, reg_alpha=0.0, reg_lambda=0.0,
               silent=True, subsample=1.0, subsample_for_bin=200000,
               subsample_freq=0)

In [41]:
pred = cl_lgbm.predict_proba(Xval_wtitle)[:,1]



In [42]:
# area of precision for decision tree
print('Random Forest baseline model - average precision: ', average_precision_score(yval, pred))

# area under curve of roc curve metric
print('Random Forest baseline model - roc auc: ', roc_auc_score(yval, pred))

Random Forest baseline model - average precision:  0.4052786105139529
Random Forest baseline model - roc auc:  0.48495049504950494


# Bayesian Optmization

In [44]:
from skopt import forest_minimize

In [46]:
def tune_lgbm(params):
    print(params)
    lr = params[0]
    max_depth = params[1]
    min_child_samples = params[2]
    subsample = params[3]
    colsample_bytree = params[4]
    n_estimators = params[5]
    
    min_df = params[6]
    ngram_range = (1, params[7])
    
    title_vec = TfidfVectorizer(min_df=min_df, ngram_range=ngram_range)
    title_bow_train = title_vec.fit_transform(title_train)
    title_bow_val = title_vec.transform(title_val)
    
    Xtrain_wtitle = hstack([Xtrain, title_bow_train])
    Xval_wtitle = hstack([Xval, title_bow_val])
    
    mdl = LGBMClassifier(learning_rate=lr, num_leaves=2 ** max_depth, max_depth=max_depth, 
                         min_child_samples=min_child_samples, subsample=subsample,
                         colsample_bytree=colsample_bytree, bagging_freq=1,n_estimators=n_estimators, random_state=0, 
                         class_weight="balanced", n_jobs=6)
    mdl.fit(Xtrain_wtitle, ytrain)
    
    p = mdl.predict_proba(Xval_wtitle)[:, 1]
    
    print(roc_auc_score(yval, p))
    
    return -average_precision_score(yval, p)

space = [(1e-3, 1e-1, 'log-uniform'), # lr    # learning rate
          (1, 10), # max_depth                # tree depth (20 was trowing an error..too deep for this problem). Maybe because there are few samples
          (1, 20), # min_child_samples
          (0.05, 1.), # subsample
          (0.05, 1.), # colsample_bytree
          (100,1000), # n_estimators          # numb of trees
          (1,5), # min_df                     # num of words on the title
          (1,5)] # ngram_range                # num of words grouped

res = forest_minimize(tune_lgbm, space, random_state=160745, n_random_starts=20, n_calls=100, verbose=1)


Iteration No: 1 started. Evaluating function at random point.
[0.009944912110647982, 5, 1, 0.4677107511929402, 0.49263223036174764, 272, 3, 1]




0.4867161716171617
Iteration No: 1 ended. Evaluation done at random point.
Time taken: 0.4352
Function value obtained: -0.4137
Current minimum: -0.4137
Iteration No: 2 started. Evaluating function at random point.
[0.053887464791860025, 1, 15, 0.7437489153990157, 0.8675167974293533, 549, 3, 4]




0.47650165016501655
Iteration No: 2 ended. Evaluation done at random point.
Time taken: 0.2236
Function value obtained: -0.4070
Current minimum: -0.4137
Iteration No: 3 started. Evaluating function at random point.
[0.004151454520895999, 6, 20, 0.8682075103820793, 0.9491436163200662, 411, 4, 3]




0.47189768976897684
Iteration No: 3 ended. Evaluation done at random point.
Time taken: 0.3079
Function value obtained: -0.4116
Current minimum: -0.4137
Iteration No: 4 started. Evaluating function at random point.
[0.0014099928811969545, 9, 9, 0.6502182010234373, 0.6866210554187129, 828, 5, 2]




0.49985148514851485
Iteration No: 4 ended. Evaluation done at random point.
Time taken: 1.1580
Function value obtained: -0.4217
Current minimum: -0.4217
Iteration No: 5 started. Evaluating function at random point.
[0.08530558241838007, 8, 19, 0.2137736299768322, 0.1313765544201984, 961, 4, 1]




0.47455445544554453
Iteration No: 5 ended. Evaluation done at random point.
Time taken: 0.3741
Function value obtained: -0.4152
Current minimum: -0.4217
Iteration No: 6 started. Evaluating function at random point.
[0.003567949451535685, 10, 19, 0.7232951768944309, 0.7298538828427115, 939, 4, 3]




0.47483498349834985
Iteration No: 6 ended. Evaluation done at random point.
Time taken: 1.0178
Function value obtained: -0.4059
Current minimum: -0.4217
Iteration No: 7 started. Evaluating function at random point.
[0.014828577273549474, 7, 1, 0.18428087097824575, 0.3261556557915816, 274, 1, 2]




0.4768811881188119
Iteration No: 7 ended. Evaluation done at random point.
Time taken: 1.2669
Function value obtained: -0.4162
Current minimum: -0.4217
Iteration No: 8 started. Evaluating function at random point.
[0.0015212976972079912, 3, 12, 0.44234694306528044, 0.399351303640462, 272, 3, 5]




0.4956765676567657
Iteration No: 8 ended. Evaluation done at random point.
Time taken: 0.3627
Function value obtained: -0.4240
Current minimum: -0.4240
Iteration No: 9 started. Evaluating function at random point.
[0.01946212855369041, 9, 18, 0.5235636153223084, 0.6728679300083596, 747, 4, 5]




0.4778052805280528
Iteration No: 9 ended. Evaluation done at random point.
Time taken: 0.6634
Function value obtained: -0.3977
Current minimum: -0.4240
Iteration No: 10 started. Evaluating function at random point.
[0.0012116790683302117, 3, 2, 0.06616307483844217, 0.23025600705315752, 677, 2, 5]




0.4925577557755776
Iteration No: 10 ended. Evaluation done at random point.
Time taken: 0.6639
Function value obtained: -0.4180
Current minimum: -0.4240
Iteration No: 11 started. Evaluating function at random point.
[0.0053139776214487944, 6, 9, 0.14251441334450304, 0.8175761405215897, 297, 1, 5]




0.47668316831683166
Iteration No: 11 ended. Evaluation done at random point.
Time taken: 0.3077
Function value obtained: -0.4116
Current minimum: -0.4240
Iteration No: 12 started. Evaluating function at random point.
[0.0068572961982704935, 10, 5, 0.2390386584472456, 0.49053406102209746, 176, 2, 4]




0.49938943894389437
Iteration No: 12 ended. Evaluation done at random point.
Time taken: 0.4156
Function value obtained: -0.4089
Current minimum: -0.4240
Iteration No: 13 started. Evaluating function at random point.
[0.00781968225875022, 3, 4, 0.7078936710077383, 0.31818755505678337, 275, 4, 4]




0.5144719471947194
Iteration No: 13 ended. Evaluation done at random point.
Time taken: 0.2633
Function value obtained: -0.4372
Current minimum: -0.4372
Iteration No: 14 started. Evaluating function at random point.
[0.017293945600511968, 2, 15, 0.9007557574888567, 0.41026441194439994, 316, 5, 1]
0.48209570957095704
Iteration No: 14 ended. Evaluation done at random point.
Time taken: 0.1501
Function value obtained: -0.4102
Current minimum: -0.4372
Iteration No: 15 started. Evaluating function at random point.
[0.012250750764764855, 8, 6, 0.5976582413192033, 0.2474882432951916, 516, 4, 4]




0.5051320132013201
Iteration No: 15 ended. Evaluation done at random point.
Time taken: 0.7238
Function value obtained: -0.4193
Current minimum: -0.4372
Iteration No: 16 started. Evaluating function at random point.
[0.018353598126553926, 4, 3, 0.47305622526323254, 0.1404164811277527, 133, 4, 1]
0.4884488448844885
Iteration No: 16 ended. Evaluation done at random point.
Time taken: 0.1796
Function value obtained: -0.4241
Current minimum: -0.4372
Iteration No: 17 started. Evaluating function at random point.
[0.0010383234748454694, 9, 19, 0.9256771571832196, 0.9321438677645206, 312, 4, 3]




0.4613201320132013
Iteration No: 17 ended. Evaluation done at random point.
Time taken: 0.4121
Function value obtained: -0.4028
Current minimum: -0.4372
Iteration No: 18 started. Evaluating function at random point.
[0.004955229758078229, 5, 5, 0.06939551310802591, 0.4193273080472823, 725, 4, 1]




0.49919141914191417
Iteration No: 18 ended. Evaluation done at random point.
Time taken: 0.3500
Function value obtained: -0.4305
Current minimum: -0.4372
Iteration No: 19 started. Evaluating function at random point.
[0.0699516121742407, 9, 10, 0.6477856515609233, 0.8594430701440198, 616, 1, 1]




0.4775082508250825
Iteration No: 19 ended. Evaluation done at random point.
Time taken: 0.7348
Function value obtained: -0.4237
Current minimum: -0.4372
Iteration No: 20 started. Evaluating function at random point.
[0.0014752743467850462, 5, 4, 0.9747950537021096, 0.982207187458162, 909, 2, 4]




0.5001650165016501
Iteration No: 20 ended. Evaluation done at random point.
Time taken: 1.7655
Function value obtained: -0.4194
Current minimum: -0.4372
Iteration No: 21 started. Searching for the next optimal point.
[0.00277879832308686, 1, 5, 0.9986260669254468, 0.5166255559037616, 255, 4, 3]
0.5144224422442244




Iteration No: 21 ended. Search finished for the next optimal point.
Time taken: 0.7175
Function value obtained: -0.4427
Current minimum: -0.4427
Iteration No: 22 started. Searching for the next optimal point.
[0.004436973609796325, 3, 3, 0.7889357050354174, 0.542207005961531, 221, 5, 5]
0.501039603960396




Iteration No: 22 ended. Search finished for the next optimal point.
Time taken: 0.8327
Function value obtained: -0.4285
Current minimum: -0.4427
Iteration No: 23 started. Searching for the next optimal point.
[0.06748441788213892, 2, 4, 0.841788552672473, 0.6634295581648063, 120, 5, 3]
0.4926237623762376




Iteration No: 23 ended. Search finished for the next optimal point.
Time taken: 0.7476
Function value obtained: -0.4317
Current minimum: -0.4427
Iteration No: 24 started. Searching for the next optimal point.
[0.003171196824832694, 3, 11, 0.9678072502611496, 0.5262001248383111, 115, 4, 4]
0.518894389438944




Iteration No: 24 ended. Search finished for the next optimal point.
Time taken: 0.8000
Function value obtained: -0.4525
Current minimum: -0.4525
Iteration No: 25 started. Searching for the next optimal point.
[0.04231374869891006, 2, 12, 0.9676455660761261, 0.6506839643902466, 128, 4, 4]
0.5026567656765677




Iteration No: 25 ended. Search finished for the next optimal point.
Time taken: 0.7017
Function value obtained: -0.4320
Current minimum: -0.4525
Iteration No: 26 started. Searching for the next optimal point.
[0.0011435335915899315, 3, 11, 0.9841106336747797, 0.3510189728625963, 222, 4, 2]
0.4936468646864686




Iteration No: 26 ended. Search finished for the next optimal point.
Time taken: 0.7483
Function value obtained: -0.4341
Current minimum: -0.4525
Iteration No: 27 started. Searching for the next optimal point.
[0.002364731164457494, 1, 15, 0.9574894435284431, 0.613114131297313, 116, 4, 4]
0.5143399339933993




Iteration No: 27 ended. Search finished for the next optimal point.
Time taken: 0.6843
Function value obtained: -0.4405
Current minimum: -0.4525
Iteration No: 28 started. Searching for the next optimal point.
[0.002705719016692904, 2, 17, 0.9821635778155591, 0.5877474517510383, 176, 4, 3]
0.5122607260726073




Iteration No: 28 ended. Search finished for the next optimal point.
Time taken: 0.7112
Function value obtained: -0.4414
Current minimum: -0.4525
Iteration No: 29 started. Searching for the next optimal point.
[0.003022696396252859, 1, 14, 0.9532702581096774, 0.6407686285196288, 409, 4, 5]




0.5217821782178218
Iteration No: 29 ended. Search finished for the next optimal point.
Time taken: 0.8568
Function value obtained: -0.4438
Current minimum: -0.4525
Iteration No: 30 started. Searching for the next optimal point.
[0.0031384665701459833, 1, 20, 0.9768358157259707, 0.5232419767331159, 114, 2, 1]
0.5150990099009901




Iteration No: 30 ended. Search finished for the next optimal point.
Time taken: 0.6477
Function value obtained: -0.4416
Current minimum: -0.4525
Iteration No: 31 started. Searching for the next optimal point.
[0.003440934421335319, 2, 18, 0.9846124358581784, 0.716313479255477, 720, 4, 4]




0.4887293729372937
Iteration No: 31 ended. Search finished for the next optimal point.
Time taken: 0.9166
Function value obtained: -0.4132
Current minimum: -0.4525
Iteration No: 32 started. Searching for the next optimal point.
[0.0016989432265925276, 3, 18, 0.9450102183125282, 0.49184741162228196, 116, 3, 3]
0.4851815181518151




Iteration No: 32 ended. Search finished for the next optimal point.
Time taken: 0.7097
Function value obtained: -0.4157
Current minimum: -0.4525
Iteration No: 33 started. Searching for the next optimal point.
[0.0340145871547682, 1, 8, 0.9510970265186084, 0.537716585222332, 539, 4, 4]




0.5014686468646865
Iteration No: 33 ended. Search finished for the next optimal point.
Time taken: 0.8739
Function value obtained: -0.4340
Current minimum: -0.4525
Iteration No: 34 started. Searching for the next optimal point.
[0.0033274185322372924, 1, 15, 0.9849152170209614, 0.9562726188160382, 194, 4, 4]
0.5233498349834984




Iteration No: 34 ended. Search finished for the next optimal point.
Time taken: 0.6269
Function value obtained: -0.4448
Current minimum: -0.4525
Iteration No: 35 started. Searching for the next optimal point.
[0.003041761507273045, 1, 16, 0.8248067257150883, 0.6678987707018582, 289, 4, 4]
0.5096864686468647




Iteration No: 35 ended. Search finished for the next optimal point.
Time taken: 0.7218
Function value obtained: -0.4297
Current minimum: -0.4525
Iteration No: 36 started. Searching for the next optimal point.
[0.005847049854632142, 1, 19, 0.9993390471811843, 0.9409216617759075, 221, 3, 5]
0.5049669966996699




Iteration No: 36 ended. Search finished for the next optimal point.
Time taken: 0.7243
Function value obtained: -0.4302
Current minimum: -0.4525
Iteration No: 37 started. Searching for the next optimal point.
[0.013835423830874896, 3, 16, 0.9426345033111334, 0.5502001283437427, 192, 4, 5]
0.49008250825082506




Iteration No: 37 ended. Search finished for the next optimal point.
Time taken: 0.7523
Function value obtained: -0.4240
Current minimum: -0.4525
Iteration No: 38 started. Searching for the next optimal point.
[0.002752756319767703, 3, 12, 0.9638177122695764, 0.15433564911676817, 142, 5, 3]
0.5025082508250825




Iteration No: 38 ended. Search finished for the next optimal point.
Time taken: 0.7237
Function value obtained: -0.4301
Current minimum: -0.4525
Iteration No: 39 started. Searching for the next optimal point.
[0.0033058121067185177, 1, 11, 0.973204305371816, 0.6191421117892214, 581, 3, 5]




0.5091254125412541
Iteration No: 39 ended. Search finished for the next optimal point.
Time taken: 0.8825
Function value obtained: -0.4260
Current minimum: -0.4525
Iteration No: 40 started. Searching for the next optimal point.
[0.003961387773354444, 4, 8, 0.9896674135792167, 0.5560730283580009, 201, 4, 3]
0.5107260726072608




Iteration No: 40 ended. Search finished for the next optimal point.
Time taken: 0.8328
Function value obtained: -0.4468
Current minimum: -0.4525
Iteration No: 41 started. Searching for the next optimal point.
[0.0026723205748597134, 4, 8, 0.9384116505899855, 0.7892596161668841, 105, 4, 5]
0.5056765676567657




Iteration No: 41 ended. Search finished for the next optimal point.
Time taken: 0.6930
Function value obtained: -0.4358
Current minimum: -0.4525
Iteration No: 42 started. Searching for the next optimal point.
[0.004345811019746198, 4, 3, 0.9727161095217765, 0.7856660475438121, 455, 4, 3]




0.4996369636963697
Iteration No: 42 ended. Search finished for the next optimal point.
Time taken: 0.9915
Function value obtained: -0.4337
Current minimum: -0.4525
Iteration No: 43 started. Searching for the next optimal point.
[0.0030344091952719254, 4, 11, 0.9894731273904502, 0.8679367026941497, 113, 2, 3]
0.5094389438943895




Iteration No: 43 ended. Search finished for the next optimal point.
Time taken: 0.7769
Function value obtained: -0.4352
Current minimum: -0.4525
Iteration No: 44 started. Searching for the next optimal point.
[0.0022795460458035982, 1, 15, 0.7821545046192098, 0.932979969296165, 110, 4, 1]
0.5087293729372938




Iteration No: 44 ended. Search finished for the next optimal point.
Time taken: 0.7378
Function value obtained: -0.4425
Current minimum: -0.4525
Iteration No: 45 started. Searching for the next optimal point.
[0.00495252436830788, 4, 17, 0.9846028248335248, 0.5512165050698371, 356, 4, 3]




0.4861386138613862
Iteration No: 45 ended. Search finished for the next optimal point.
Time taken: 0.7592
Function value obtained: -0.4314
Current minimum: -0.4525
Iteration No: 46 started. Searching for the next optimal point.
[0.004493082209541516, 4, 10, 0.9041911679014011, 0.4560829349448199, 115, 4, 1]
0.48376237623762375




Iteration No: 46 ended. Search finished for the next optimal point.
Time taken: 0.6969
Function value obtained: -0.4196
Current minimum: -0.4525
Iteration No: 47 started. Searching for the next optimal point.
[0.0017206336302334688, 5, 14, 0.9754702462664983, 0.6045688931429561, 223, 4, 5]
0.4885313531353135




Iteration No: 47 ended. Search finished for the next optimal point.
Time taken: 0.7509
Function value obtained: -0.4317
Current minimum: -0.4525
Iteration No: 48 started. Searching for the next optimal point.
[0.0038423806145788477, 1, 20, 0.28175484525810757, 0.2378371541380112, 110, 4, 3]
0.5060891089108911




Iteration No: 48 ended. Search finished for the next optimal point.
Time taken: 0.6989
Function value obtained: -0.4445
Current minimum: -0.4525
Iteration No: 49 started. Searching for the next optimal point.
[0.003570714794333516, 1, 13, 0.7608220942027364, 0.34517809235823366, 192, 2, 1]
0.5122607260726072




Iteration No: 49 ended. Search finished for the next optimal point.
Time taken: 0.7586
Function value obtained: -0.4417
Current minimum: -0.4525
Iteration No: 50 started. Searching for the next optimal point.
[0.028477934854134527, 1, 14, 0.05866190878559363, 0.25622168628428665, 194, 3, 1]
0.5175577557755776




Iteration No: 50 ended. Search finished for the next optimal point.
Time taken: 0.7089
Function value obtained: -0.4374
Current minimum: -0.4525
Iteration No: 51 started. Searching for the next optimal point.
[0.001688453162384239, 1, 16, 0.674326868502573, 0.8629344337469121, 456, 4, 1]
0.4986963696369636




Iteration No: 51 ended. Search finished for the next optimal point.
Time taken: 0.7742
Function value obtained: -0.4403
Current minimum: -0.4525
Iteration No: 52 started. Searching for the next optimal point.
[0.004082734637056895, 1, 20, 0.27242016999461655, 0.8377252018397743, 702, 4, 1]




0.484026402640264
Iteration No: 52 ended. Search finished for the next optimal point.
Time taken: 0.9435
Function value obtained: -0.4296
Current minimum: -0.4525
Iteration No: 53 started. Searching for the next optimal point.
[0.04269282257983303, 1, 17, 0.9056195513067608, 0.27528952081239993, 330, 4, 1]
0.4774917491749175




Iteration No: 53 ended. Search finished for the next optimal point.
Time taken: 0.7196
Function value obtained: -0.4215
Current minimum: -0.4525
Iteration No: 54 started. Searching for the next optimal point.
[0.0041751123021073824, 2, 7, 0.3951586388441411, 0.42284953755269905, 110, 4, 3]
0.5087953795379538




Iteration No: 54 ended. Search finished for the next optimal point.
Time taken: 0.7851
Function value obtained: -0.4156
Current minimum: -0.4525
Iteration No: 55 started. Searching for the next optimal point.
[0.008885813093433462, 1, 18, 0.37626336191183335, 0.3257972972337165, 125, 3, 3]
0.4965016501650165




Iteration No: 55 ended. Search finished for the next optimal point.
Time taken: 0.7061
Function value obtained: -0.4266
Current minimum: -0.4525
Iteration No: 56 started. Searching for the next optimal point.
[0.0044206976177799395, 3, 9, 0.9858009841946129, 0.5598910685210584, 104, 2, 2]
0.4953795379537953




Iteration No: 56 ended. Search finished for the next optimal point.
Time taken: 0.7273
Function value obtained: -0.4205
Current minimum: -0.4525
Iteration No: 57 started. Searching for the next optimal point.
[0.0030655102333650365, 3, 9, 0.9529527980949006, 0.1426589055453374, 132, 4, 5]
0.5373267326732674




Iteration No: 57 ended. Search finished for the next optimal point.
Time taken: 0.6908
Function value obtained: -0.4674
Current minimum: -0.4674
Iteration No: 58 started. Searching for the next optimal point.
[0.003520290089436405, 5, 9, 0.8383887862388262, 0.0938886682346032, 162, 4, 5]
0.5026237623762377




Iteration No: 58 ended. Search finished for the next optimal point.
Time taken: 0.8175
Function value obtained: -0.4388
Current minimum: -0.4674
Iteration No: 59 started. Searching for the next optimal point.
[0.00100754186246827, 3, 19, 0.9497073922935056, 0.06818921362768292, 127, 5, 5]
0.5132178217821782




Iteration No: 59 ended. Search finished for the next optimal point.
Time taken: 0.7907
Function value obtained: -0.4327
Current minimum: -0.4674
Iteration No: 60 started. Searching for the next optimal point.
[0.0021109780729995655, 3, 6, 0.7362618796614544, 0.06969452140902549, 202, 4, 5]
0.5287458745874588




Iteration No: 60 ended. Search finished for the next optimal point.
Time taken: 0.8159
Function value obtained: -0.4483
Current minimum: -0.4674
Iteration No: 61 started. Searching for the next optimal point.
[0.001738485397936115, 2, 1, 0.6241626670954307, 0.17773626222727618, 170, 4, 5]
0.49447194719471943




Iteration No: 61 ended. Search finished for the next optimal point.
Time taken: 0.7833
Function value obtained: -0.4069
Current minimum: -0.4674
Iteration No: 62 started. Searching for the next optimal point.
[0.002590647561363787, 3, 3, 0.69082671594169, 0.10679653766523273, 180, 4, 4]




0.5247029702970297
Iteration No: 62 ended. Search finished for the next optimal point.
Time taken: 0.8592
Function value obtained: -0.4394
Current minimum: -0.4674
Iteration No: 63 started. Searching for the next optimal point.
[0.0031227029455336928, 2, 10, 0.6609531300118973, 0.07910306292162295, 138, 4, 4]
0.5220957095709571




Iteration No: 63 ended. Search finished for the next optimal point.
Time taken: 0.7340
Function value obtained: -0.4438
Current minimum: -0.4674
Iteration No: 64 started. Searching for the next optimal point.
[0.0036159922494946833, 3, 4, 0.8308839826813481, 0.26144266181564035, 110, 4, 5]
0.5172277227722772




Iteration No: 64 ended. Search finished for the next optimal point.
Time taken: 0.8384
Function value obtained: -0.4488
Current minimum: -0.4674
Iteration No: 65 started. Searching for the next optimal point.
[0.0035222264466160396, 3, 4, 0.9151734513575553, 0.3824236688592567, 159, 4, 5]
0.5166006600660067




Iteration No: 65 ended. Search finished for the next optimal point.
Time taken: 0.7837
Function value obtained: -0.4435
Current minimum: -0.4674
Iteration No: 66 started. Searching for the next optimal point.
[0.0026544900377379554, 5, 7, 0.967093260484254, 0.07552826835375895, 143, 4, 5]
0.541006600660066




Iteration No: 66 ended. Search finished for the next optimal point.
Time taken: 0.7798
Function value obtained: -0.4709
Current minimum: -0.4709
Iteration No: 67 started. Searching for the next optimal point.
[0.002050707063777123, 5, 8, 0.5015826535208897, 0.30314371677884633, 189, 4, 5]




0.5155610561056105
Iteration No: 67 ended. Search finished for the next optimal point.
Time taken: 0.8340
Function value obtained: -0.4275
Current minimum: -0.4709
Iteration No: 68 started. Searching for the next optimal point.
[0.002925326808300689, 3, 6, 0.9541432618544554, 0.0593877978394456, 146, 3, 5]
0.49867986798679864




Iteration No: 68 ended. Search finished for the next optimal point.
Time taken: 0.9055
Function value obtained: -0.4385
Current minimum: -0.4709
Iteration No: 69 started. Searching for the next optimal point.
[0.001559140496431934, 6, 20, 0.9177156996090977, 0.15263525305637551, 122, 5, 5]
0.5315511551155115




Iteration No: 69 ended. Search finished for the next optimal point.
Time taken: 0.7654
Function value obtained: -0.4554
Current minimum: -0.4709
Iteration No: 70 started. Searching for the next optimal point.
[0.0053292223483304875, 5, 4, 0.9554439862659898, 0.1467729802095056, 137, 4, 5]




0.5147689768976897
Iteration No: 70 ended. Search finished for the next optimal point.
Time taken: 0.8633
Function value obtained: -0.4499
Current minimum: -0.4709
Iteration No: 71 started. Searching for the next optimal point.
[0.0020287835989926915, 3, 10, 0.9826469864614122, 0.13927079116435467, 109, 4, 5]
0.5300330033003301




Iteration No: 71 ended. Search finished for the next optimal point.
Time taken: 0.7717
Function value obtained: -0.4619
Current minimum: -0.4709
Iteration No: 72 started. Searching for the next optimal point.
[0.0012234774564751879, 9, 11, 0.9560158678766078, 0.1738321517705897, 174, 5, 5]




0.5170792079207921
Iteration No: 72 ended. Search finished for the next optimal point.
Time taken: 0.8796
Function value obtained: -0.4475
Current minimum: -0.4709
Iteration No: 73 started. Searching for the next optimal point.
[0.001017546759367843, 9, 19, 0.9086028064303663, 0.09909759172844335, 149, 2, 4]




0.495
Iteration No: 73 ended. Search finished for the next optimal point.
Time taken: 0.8380
Function value obtained: -0.4208
Current minimum: -0.4709
Iteration No: 74 started. Searching for the next optimal point.
[0.0016915118311549022, 6, 6, 0.9606583337509034, 0.13659677218604985, 105, 2, 5]
0.5104785478547855




Iteration No: 74 ended. Search finished for the next optimal point.
Time taken: 0.8462
Function value obtained: -0.4357
Current minimum: -0.4709
Iteration No: 75 started. Searching for the next optimal point.
[0.0034352359149623117, 10, 14, 0.9538201268278457, 0.21432706167598575, 209, 4, 5]




0.5116006600660067
Iteration No: 75 ended. Search finished for the next optimal point.
Time taken: 1.0382
Function value obtained: -0.4423
Current minimum: -0.4709
Iteration No: 76 started. Searching for the next optimal point.
[0.014880972174758929, 8, 8, 0.9907189827019779, 0.06577771304657233, 106, 4, 5]
0.5360396039603961




Iteration No: 76 ended. Search finished for the next optimal point.
Time taken: 0.8277
Function value obtained: -0.4389
Current minimum: -0.4709
Iteration No: 77 started. Searching for the next optimal point.
[0.0029861370782391186, 3, 4, 0.9911859798033493, 0.12860897416043926, 311, 4, 5]




0.5150330033003301
Iteration No: 77 ended. Search finished for the next optimal point.
Time taken: 0.8780
Function value obtained: -0.4467
Current minimum: -0.4709
Iteration No: 78 started. Searching for the next optimal point.
[0.0061457142702299565, 3, 11, 0.9609187123445667, 0.13534697007319405, 170, 4, 5]
0.5255610561056105




Iteration No: 78 ended. Search finished for the next optimal point.
Time taken: 0.8043
Function value obtained: -0.4410
Current minimum: -0.4709
Iteration No: 79 started. Searching for the next optimal point.
[0.0028527706721238576, 9, 7, 0.8409546348557474, 0.13205498087992484, 372, 5, 5]




0.520858085808581
Iteration No: 79 ended. Search finished for the next optimal point.
Time taken: 1.2190
Function value obtained: -0.4481
Current minimum: -0.4709
Iteration No: 80 started. Searching for the next optimal point.
[0.0017051490163594574, 8, 2, 0.659810942608322, 0.10515309841388645, 133, 5, 5]




0.4911881188118812
Iteration No: 80 ended. Search finished for the next optimal point.
Time taken: 0.9434
Function value obtained: -0.4464
Current minimum: -0.4709
Iteration No: 81 started. Searching for the next optimal point.
[0.0014856414622216266, 9, 3, 0.7371278727119489, 0.06630416303453886, 139, 4, 5]




0.528036303630363
Iteration No: 81 ended. Search finished for the next optimal point.
Time taken: 0.9469
Function value obtained: -0.4349
Current minimum: -0.4709
Iteration No: 82 started. Searching for the next optimal point.
[0.0027555636258059725, 1, 1, 0.9810393281458469, 0.09908968370340616, 149, 1, 5]




0.47922442244224417
Iteration No: 82 ended. Search finished for the next optimal point.
Time taken: 1.2311
Function value obtained: -0.4128
Current minimum: -0.4709
Iteration No: 83 started. Searching for the next optimal point.
[0.003511839199396887, 9, 3, 0.9764895071661562, 0.23462565138761304, 143, 5, 5]




0.5018646864686469
Iteration No: 83 ended. Search finished for the next optimal point.
Time taken: 0.9670
Function value obtained: -0.4379
Current minimum: -0.4709
Iteration No: 84 started. Searching for the next optimal point.
[0.023046780023420567, 4, 17, 0.9767152544142764, 0.15174993893364908, 270, 4, 5]




0.5056600660066006
Iteration No: 84 ended. Search finished for the next optimal point.
Time taken: 0.7667
Function value obtained: -0.4279
Current minimum: -0.4709
Iteration No: 85 started. Searching for the next optimal point.
[0.002940880168739974, 5, 11, 0.9599248895177958, 0.1935741708634694, 113, 4, 4]
0.5227557755775577




Iteration No: 85 ended. Search finished for the next optimal point.
Time taken: 0.7466
Function value obtained: -0.4439
Current minimum: -0.4709
Iteration No: 86 started. Searching for the next optimal point.
[0.002036342875857655, 3, 7, 0.894411021517606, 0.15736940904823044, 139, 5, 5]
0.5067491749174918




Iteration No: 86 ended. Search finished for the next optimal point.
Time taken: 0.7020
Function value obtained: -0.4289
Current minimum: -0.4709
Iteration No: 87 started. Searching for the next optimal point.
[0.001148731610072356, 2, 20, 0.9526886725458354, 0.11224742666124021, 189, 4, 5]
0.5115016501650165




Iteration No: 87 ended. Search finished for the next optimal point.
Time taken: 0.7790
Function value obtained: -0.4248
Current minimum: -0.4709
Iteration No: 88 started. Searching for the next optimal point.
[0.004328065059845864, 5, 8, 0.9281512406536647, 0.24178291669528706, 122, 4, 5]
0.5251650165016502




Iteration No: 88 ended. Search finished for the next optimal point.
Time taken: 0.7942
Function value obtained: -0.4496
Current minimum: -0.4709
Iteration No: 89 started. Searching for the next optimal point.
[0.002441378109922619, 1, 3, 0.9613539221902533, 0.10011948164424775, 102, 4, 3]
0.5285313531353135




Iteration No: 89 ended. Search finished for the next optimal point.
Time taken: 0.7996
Function value obtained: -0.4452
Current minimum: -0.4709
Iteration No: 90 started. Searching for the next optimal point.
[0.0030975275957532146, 5, 15, 0.9387306959584709, 0.10705283776910092, 432, 4, 5]




0.519983498349835
Iteration No: 90 ended. Search finished for the next optimal point.
Time taken: 0.8448
Function value obtained: -0.4383
Current minimum: -0.4709
Iteration No: 91 started. Searching for the next optimal point.
[0.0028960828190192577, 7, 6, 0.9884189417716618, 0.06965894245648643, 721, 4, 5]




0.5417656765676567
Iteration No: 91 ended. Search finished for the next optimal point.
Time taken: 1.3067
Function value obtained: -0.4548
Current minimum: -0.4709
Iteration No: 92 started. Searching for the next optimal point.
[0.002461094855183414, 2, 8, 0.96610581033054, 0.14822445158677539, 782, 4, 2]




0.5095544554455446
Iteration No: 92 ended. Search finished for the next optimal point.
Time taken: 1.0858
Function value obtained: -0.4268
Current minimum: -0.4709
Iteration No: 93 started. Searching for the next optimal point.
[0.002713190354220995, 5, 11, 0.8988525778760663, 0.22760999900206336, 710, 4, 5]




0.508993399339934
Iteration No: 93 ended. Search finished for the next optimal point.
Time taken: 1.1359
Function value obtained: -0.4277
Current minimum: -0.4709
Iteration No: 94 started. Searching for the next optimal point.
[0.007187972788081895, 1, 5, 0.9999239900782729, 0.0503391362115006, 129, 4, 5]
0.5376567656765676




Iteration No: 94 ended. Search finished for the next optimal point.
Time taken: 0.7275
Function value obtained: -0.4450
Current minimum: -0.4709
Iteration No: 95 started. Searching for the next optimal point.
[0.001411591928970663, 5, 3, 0.9863854043008259, 0.2044867591310035, 145, 4, 5]




0.5235973597359735
Iteration No: 95 ended. Search finished for the next optimal point.
Time taken: 0.9114
Function value obtained: -0.4523
Current minimum: -0.4709
Iteration No: 96 started. Searching for the next optimal point.
[0.0018331995475656402, 9, 8, 0.952733351762957, 0.3920526737713918, 104, 4, 5]




0.5246369636963697
Iteration No: 96 ended. Search finished for the next optimal point.
Time taken: 1.0394
Function value obtained: -0.4556
Current minimum: -0.4709
Iteration No: 97 started. Searching for the next optimal point.
[0.002446241144641319, 10, 6, 0.9965375770521938, 0.4457835157891931, 161, 4, 5]




0.5144719471947194
Iteration No: 97 ended. Search finished for the next optimal point.
Time taken: 1.2979
Function value obtained: -0.4438
Current minimum: -0.4709
Iteration No: 98 started. Searching for the next optimal point.
[0.004298934216578211, 5, 8, 0.9648787195170365, 0.20524425011469577, 926, 4, 5]




0.5193234323432343
Iteration No: 98 ended. Search finished for the next optimal point.
Time taken: 1.7665
Function value obtained: -0.4447
Current minimum: -0.4709
Iteration No: 99 started. Searching for the next optimal point.
[0.0012022803873517154, 1, 4, 0.9648837901288677, 0.12817269323992408, 891, 4, 5]




0.5267491749174917
Iteration No: 99 ended. Search finished for the next optimal point.
Time taken: 0.9666
Function value obtained: -0.4443
Current minimum: -0.4709
Iteration No: 100 started. Searching for the next optimal point.
[0.0015158247975690623, 9, 9, 0.8908223182660441, 0.34840088282954385, 405, 4, 5]




0.5127557755775577
Iteration No: 100 ended. Search finished for the next optimal point.
Time taken: 1.2166
Function value obtained: -0.4393
Current minimum: -0.4709


## LGBM metrics
average precision: 0.4709

auc:  0.5155610561056105

Optmized parameters:  [0.0026544900377379554, 5, 7, 0.967093260484254, 0.07552826835375895, 143, 4, 5]

## Get optmize parameters

In [49]:
print('Optmized parameters: ', res.x)

Optmized parameters:  [0.0026544900377379554, 5, 7, 0.967093260484254, 0.07552826835375895, 143, 4, 5]


Learning Rate: 0.0026544900377379554

Max depth: 5

Min child samples: 7

Subsample: 0.967093260484254

Col Samples by tree: 0.07552826835375895  (% of samples used in each tree(

Num of trees (n_estimators): 143

min_df: 4

ngram_range: 5

## Logistic Regression


In [50]:
from sklearn.preprocessing import MaxAbsScaler, StandardScaler
from scipy.sparse import csr_matrix

In [55]:
Xtrain_wtitle2 = csr_matrix(Xtrain_wtitle.copy())
Xval_wtitle2 = csr_matrix(Xval_wtitle.copy())

#scaler = StandardScaler()
scaler = MaxAbsScaler()


#Xtrain_wtitle2[:, :2] = scaler.fit_transform(Xtrain_wtitle2[:, :2].todense())
#Xval_wtitle2[:, :2] = scaler.transform(Xval_wtitle2[:, :2].todense())

Xtrain_wtitle2 = scaler.fit_transform(Xtrain_wtitle2)
Xval_wtitle2 = scaler.transform(Xval_wtitle2)

In [54]:
Xval_wtitle2.shape

(352, 721)

In [56]:
cl_lr = LogisticRegression(C=0.5,n_jobs=6, random_state=0)
cl_lr.fit(Xtrain_wtitle2, ytrain)

  " = {}.".format(effective_n_jobs(self.n_jobs)))


LogisticRegression(C=0.5, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=6, penalty='l2', random_state=0,
                   solver='warn', tol=0.0001, verbose=0, warm_start=False)

In [57]:
p = cl_lr.predict_proba(Xval_wtitle2)[:, 1]

In [58]:
average_precision_score(yval, p), roc_auc_score(yval, p)

(0.46816171307972243, 0.5476402640264026)