In [1]:
# Pandas for table processing
import pandas as pd
import re
import lightgbm as lgb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

# Packages for auxilary data science tasks: dividing the dataset to train and test and metrics summary generation
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, precision_score, accuracy_score

import rpy2
# import rpy2's package module
import rpy2.robjects.packages as rpackages
from rpy2.robjects import FloatVector
# R vector of strings
from rpy2.robjects.vectors import StrVector

Unable to determine R home: [WinError 2] The system cannot find the file specified


In [2]:
data = pd.read_csv("Glass_Type.csv")

In [3]:
data

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,0,2732,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,'vehic wind float'
1,1,2607,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,headlamps
2,2,1653,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,'build wind float'
3,3,3264,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,'build wind non-float'
4,4,4931,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,'build wind non-float'
...,...,...,...,...,...,...,...,...,...,...,...,...
192595,192595,6341,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,headlamps
192596,192596,1042,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,containers
192597,192597,1210,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,'vehic wind float'
192598,192598,191,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,tableware


In [4]:
# Due to LightGBMError: Do not support special JSON characters in feature name. 
# Solution found in: https://stackoverflow.com/questions/60582050/lightgbmerror-do-not-support-special-json-characters-in-feature-name-the-same
data = data.rename(columns = lambda x:re.sub('[^A-Za-z0-9_]+', '', x))

In [5]:
data.Type.value_counts()

'build wind non-float'    68251
'build wind float'        62455
headlamps                 26201
'vehic wind float'        15233
containers                12038
tableware                  8422
Name: Type, dtype: int64

In [6]:
# Labels encoding from categories to Integers
data['Type'] = LabelEncoder().fit_transform(data['Type'])
# 0 -> 'build wind float'
# 1 -> 'build wind non-float'
# 2 -> 'vehic wind float'
# 3 -> containers
# 4 -> headlamps
# 5 -> tableware

In [7]:
data

Unnamed: 0,Unnamed0,Unnamed01,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,0,2732,1.52211,14.19,3.78,0.91,71.36,0.23,9.14,0.00,0.37,2
1,1,2607,1.51645,14.94,0.00,1.87,73.11,0.00,8.67,1.38,0.00,4
2,2,1653,1.51793,13.21,3.48,1.41,72.64,0.59,8.43,0.00,0.00,0
3,3,3264,1.51730,12.35,2.72,1.63,72.87,0.70,9.23,0.00,0.00,1
4,4,4931,1.51673,13.30,3.64,1.53,72.53,0.65,8.03,0.00,0.29,1
...,...,...,...,...,...,...,...,...,...,...,...,...
192595,192595,6341,1.51613,13.88,1.78,1.79,73.10,0.00,8.67,0.76,0.00,4
192596,192596,1042,1.51915,12.73,1.85,1.86,72.69,0.60,10.09,0.00,0.00,3
192597,192597,1210,1.52127,14.32,3.90,0.83,71.50,0.00,9.49,0.00,0.00,2
192598,192598,191,1.51888,14.99,0.78,1.74,72.50,0.00,9.95,0.00,0.00,5


In [8]:
def getOmittedData(dataFrame, classIndex):
    '''
    Returns dataframe of the omitted class

            Parameters:
                    dataFrame (pandas dataframe): the data
                    classIndex (int): class index to be omitted

            Returns:
                    omitted_class (pandas dataframe): dataframe of the omitted class
    '''
    omitted_class = dataFrame.loc[dataFrame['Type'] == classIndex]
    return omitted_class

In [9]:
def getNonOmittedData(dataFrame, classIndex):
    '''
    Returns dataframe without omitted class

            Parameters:
                    dataFrame (pandas dataframe): the data
                    classIndex (int): class index to be omitted

            Returns:
                    data_without_omitted_class (pandas dataframe): dataframe of the non-omitted class
    '''
    data_without_omitted_class = dataFrame.loc[dataFrame['Type'] != classIndex]
    return data_without_omitted_class


In [10]:
def trainTestSplit(dataFrame, testSize):
    '''
    Returns the train test splits

            Parameters:
                    dataFrame (pandas dataframe): the data (non-omitted data)
                    testSize (int): the absolute number of test samples.

            Returns:
                    X_train, X_test, Y_train, Y_test (pandas dataframes): the train test splits
    '''
    X_train, X_test, Y_train, Y_test = train_test_split(dataFrame.drop(columns=["Type"]), 
                                                        dataFrame.Type,
                                                        test_size=testSize, stratify=dataFrame.Type)
    return X_train, X_test, Y_train, Y_test


In [11]:
def trainModel(X_train, Y_train):
    '''
    Returns a trained lightGBM classifier model 

            Parameters:
                    X_train (pandas dataframe): the data (non-omitted data)
                    Y_train (pandas Series): data labels

            Returns:
                    clf (lightgbm.basic.Booster): trained LightGBM classifier
    '''
    #Converting the dataset in proper LGB format
    d_train=lgb.Dataset(X_train, label=Y_train)
    #setting up the parameters
    params={}
    params['learning_rate']=0.1
    params['boosting_type']='gbdt' #GradientBoostingDecisionTree
    params['objective']='multiclass' #Multi-class target feature
    params['metric']='multi_logloss' #metric for multi-class
    params['max_depth']=15
    params['num_class']=6 #no.of unique values in the target class not inclusive of the end value
    #training the model
    clf=lgb.train(params,d_train,100)  #training the model on 100 epocs
    return clf

In [12]:
def getConfidenceScores(probas):
    '''
    Returns confidence Scores of a model predictions 

            Parameters:
                    probas (numpy array): 2d array of class probabilities

            Returns:
                    confidenceScores (numpy array): confidence scores
    '''
    confidenceScores = np.amax(probas, axis=1)
    confidenceScores = np.amax(probas, axis=1)
    return confidenceScores

In [13]:
def plotHistorgam(confidenceScores):
    '''
    Plots histogram of confidence scores 

            Parameters:
                    confidenceScores (numpy array): confidence scores

            Returns:
                    plot of confidence score histogram
    '''
    sns.set(style="darkgrid")
    df = sns.load_dataset("iris")
    plt.figure(figsize = (15,8))

    sns.histplot(data=confidenceScores, color="skyblue", label="non-omitted", kde=True)

    plt.legend() 
    plt.show()

In [14]:
def plotHistograms(confidenceScoresNonOmitted, confidenceScoresOmitted):
    '''
    Plots histogram of confidence scores 

            Parameters:
                    confidenceScoresNonOmitted (numpy array): confidence scores of the non omitted data
                    confidenceScoresOmitted (numpy array): confidence scores of the omitted data

            Returns:
                    plot of confidence score histogram (combined of non omitted and omitted data)
    '''
    sns.set(style="darkgrid")
    df = sns.load_dataset("iris")
    plt.figure(figsize = (15,8))

    sns.histplot(data=confidenceScoresNonOmitted, color="skyblue", label="non-omitted", kde=True)
    sns.histplot(data=confidenceScoresOmitted, color="red", label="omitted", kde=True)


    plt.legend() 
    plt.show()

#  Drift simulation

In [15]:
def dataInBatches(confidenceScores, length, batchSize):
    '''
    Returs confidence scores in batches 

            Parameters:
                    confidenceScores (numpy array): confidence scores data
                    lentgh (int): the lentgh of dataset to be used
                    batchSize (int): the batch size

            Returns:
                    confidenceScoresInBatches (numpy array): confidence scores in batches
                    
    '''
    confidenceScoresInBatches = confidenceScores[:length].reshape(int(length/batchSize),batchSize)
    return confidenceScoresInBatches

In [16]:
# import R's utility package
utils = rpackages.importr('utils')

# select a mirror for R packages
utils.chooseCRANmirror(ind=1) # select the first mirror in the list

<rpy2.rinterface_lib.sexp.NULLType object at 0x000002873F196408> [RTYPES.NILSXP]

In [17]:
# R package names
packnames = ('cpm')

# Selectively install what needs to be install.
# We are fancy, just because we can.
names_to_install = [x for x in packnames if not rpackages.isinstalled(x)]
if len(names_to_install) > 0:
    utils.install_packages(StrVector(names_to_install))

In [18]:
cpm = rpackages.importr("cpm")

In [19]:
def gradualToHalfDriftSimulation(nonOmittedDataInBatches, omittedDataInBatches, driftType):
    '''
    Simulate the drift case of sudden_(half/full)_return

            Parameters:
                    confidenceScoresNonOmitted (numpy array): confidence scores of the non omitted data
                    confidenceScoresOmitted (numpy array): confidence scores of the omitted data
                    driftType (str): quarter, half or full

            Returns:
                    confidenceScoresInBatches (numpy array): confidence scores in batches with drift simulated
                    
    '''
    driftTypeDict = {
      "half": 10,
      "full": 20
    }
    #Get the coefficient correspoinding to the drift type. This int is facilate the slicing operations
    driftProportion = driftTypeDict[driftType]
    #Lists of before and after the changepoint
    befor_cp = nonOmittedDataInBatches[:int(len(nonOmittedDataInBatches)/2)]
    after_cp = nonOmittedDataInBatches[int(len(nonOmittedDataInBatches)/2):]
    # flat omittedDataInBatches before random selection
    omittedDataFlatted = omittedDataInBatches.flatten()
    #Gauss sum: get (n*(n+1))/2 first elements. This will be (20*21)/2 = 210
    x = np.random.choice(omittedDataFlatted, size=210, replace=False)
    a  =np.zeros(shape=(20,20))
    for i in range(1,21):
        #print(after_cp[i-1:i,:20-i][0])
        print()
        doneArray =np.concatenate((after_cp[i-1:i,:20-i][0],x[:i]))
        a[i-1] = doneArray
    np.random.shuffle(a.T)
    y = np.random.choice(omittedDataFlatted, size=driftProportion*(len(after_cp)-20), replace=False).reshape(len(after_cp)-20,driftProportion)
    x = np.concatenate((after_cp[20:,:20-driftProportion],y),axis=1)
    np.random.shuffle(x.T)
    z = np.concatenate([a,x])
    x = np.concatenate([befor_cp,z])
    return x

In [None]:
globalOutputResult = []
outputPerClass=[]
for class_num in range(6):
    nonOmittedData = getNonOmittedData(data,class_num)
    omittedData = getOmittedData(data,class_num)
    
    for i in range(50):
        x_train, x_test, y_train, y_test = trainTestSplit(nonOmittedData, 5000)
        clf = trainModel(x_train, y_train)
        #prediction on non-omitted dataset
        y_pred=clf.predict(x_test)
        winningClassCs = getConfidenceScores(y_pred)
        # Test on omitted dataset
        X_test_omitted = omittedData.iloc[:,:-1] # all colmns without the last column
        y_pred_omitted=clf.predict(X_test_omitted)
        winningClassCsOmitted = getConfidenceScores(y_pred_omitted)

        winningClassCs = dataInBatches(winningClassCs, 5000, 20)
        winningClassCsOmitted = dataInBatches(winningClassCsOmitted, 2500, 20)
        cpm = rpackages.importr("cpm")
        print("Class ", class_num, "--------> ", str(i+1),"/50 running...")
        x = gradualToHalfDriftSimulation(winningClassCs, winningClassCsOmitted, "full")
        #y=pd.DataFrame(x)
        #y.to_csv("csvDumps/cs_"+str(class_num)+"_"+str(i+1)+".csv", sep='\t')
        x = x.flatten()
        vector = x.tolist()
        vectorToFloatVector = FloatVector(vector)
        a = cpm.detectChangePoint(vectorToFloatVector,"Cramer-von-Mises",ARL0=5000,startup=20)
        dataset = pd.DataFrame(np.asarray(a))
        # Save [changePoint, detectionTime, changeDetected]
        output = np.asarray(dataset[3:][0].tolist()).flatten().tolist()
        output.append(class_num)
        outputPerClass.append(output)
        print(output)
        print("Class ", class_num, "--------> ", str(i+1),"/50 done.")
    globalOutputResult.append(outputPerClass)
    print("Class ", class_num, "done")
    outputPerClass=[]

You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  1 /50 running...
























[720, 733, 1, 0]
Class  0 -------->  1 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  2 /50 running...
























[2596, 2605, 1, 0]
Class  0 -------->  2 /50 done.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  3 /50 running...
























[2613, 2621, 1, 0]
Class  0 -------->  3 /50 done.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  4 /50 running...
























[2557, 2701, 1, 0]
Class  0 -------->  4 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  5 /50 running...
























[2551, 2604, 1, 0]
Class  0 -------->  5 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  6 /50 running...
























[2106, 2144, 1, 0]
Class  0 -------->  6 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  7 /50 running...
























[2609, 2615, 1, 0]
Class  0 -------->  7 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  8 /50 running...




















[262, 281, 1, 0]
Class  0 -------->  8 /50 done.




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  9 /50 running...
























[2546, 2680, 1, 0]
Class  0 -------->  9 /50 done.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  10 /50 running...
























[2521, 2571, 1, 0]
Class  0 -------->  10 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  11 /50 running...




















[45, 72, 1, 0]
Class  0 -------->  11 /50 done.




You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  12 /50 running...
























[546, 566, 1, 0]
Class  0 -------->  12 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  13 /50 running...
























[672, 685, 1, 0]
Class  0 -------->  13 /50 done.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  14 /50 running...
























[2541, 2595, 1, 0]
Class  0 -------->  14 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  15 /50 running...
























[1023, 1036, 1, 0]
Class  0 -------->  15 /50 done.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  16 /50 running...
























[2579, 2622, 1, 0]
Class  0 -------->  16 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  17 /50 running...
























[2441, 2640, 1, 0]
Class  0 -------->  17 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  18 /50 running...
























[2435, 2451, 1, 0]
Class  0 -------->  18 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  19 /50 running...
























[2480, 2572, 1, 0]
Class  0 -------->  19 /50 done.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 1255
[LightGBM] [Info] Number of data points in the train set: 125145, number of used features: 11
[LightGBM] [Info] Start training from score -34.538776
[LightGBM] [Info] Start training from score -0.645455
[LightGBM] [Info] Start training from score -2.145169
[LightGBM] [Info] Start training from score -2.380539
[LightGBM] [Info] Start training from score -1.602867
[LightGBM] [Info] Start training from score -2.737856


Class  0 -------->  20 /50 running...






















In [None]:
with open('gradualReturn_full_CvM.txt', 'w') as f:
    for item in globalOutputResult:
        f.write("%s\n" % item)