## Import Modules

In [1]:
import pandas as pd

import numpy as np

import matplotlib.pyplot as plt


#sklearn
from sklearn.linear_model import LogisticRegression
from sklearn import metrics
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import matthews_corrcoef

plt.style.use('fivethirtyeight')
import seaborn as sns
# Use seaborn style defaults and set the default figure size
sns.set_theme(style='whitegrid', font='Arial', rc={'figure.figsize':(10,5),
            'font.size':14,
            'axes.titlesize':16,
            'axes.labelsize':15,
            'xtick.labelsize': 12,
            'ytick.labelsize': 12,
            'legend.fontsize': 13},color_codes=True)

# CSV
import csv

In [2]:
# room number
a = 'E07'
# number of lags as input (not relevant in this case)
#b = '5'
# number of last timestep to predict
c = 12

In [3]:
# d = number of timestep to predict
for d in range(1, c+1):
    
    if d==0:
        # Timestep as string
        e = 't'
    else:
        # Timestep as string
        e = 't+'+str(d)
    
    
    # Import data
    
    # Read in data and set index

    raw_data = pd.read_csv("\Pre-Processing\data_E07_input_5_output_144.txt", parse_dates=True)
    data = raw_data.copy()
    data['DateTime'] = pd.to_datetime(data['DateTime'])
    data = data.set_index('DateTime')
    
    # Drop columns for Year and Second
    data = data.drop('Second_0', axis = 1)
    
    # Saving data names for later use
    data_list = list(data.columns)
    
    # Drop colums that should not be used as features in this model (columns that refer to the date)
    data = data.drop(data_list[6:86], axis = 1)
    
    # Drop columns of future timestamps that should not be used as input for this model
    if d==0:
        for i in range(1,145):
            v = 't+'+str(i)
            data = data.drop(v, axis = 1)
    else:
        for i in range(d+1,145):
            v = 't+'+str(i)
            data = data.drop(v, axis = 1)

        for i in range(1, d):
            v = 't+'+str(i)
            data = data.drop(v, axis = 1)
    
    
    # parameter that only should be used as input
    if d==0:
        l = ['E07CO2', 'E07Tair', 'E07ElL', 'E07SP', 'E07W', 'E07WT', 't-1', 't-2', 't-3', 't-4','t-5']
    else:
        l = ['E07CO2', 'E07Tair', 'E07ElL', 'E07SP', 'E07W', 'E07WT', 't', 't-1', 't-2', 't-3', 't-4','t-5']
        
    for f in l:
    
    
        # Create data set / dummy variables
    
        # keep only one parameter and the timestep to predict
        to_keep = []
        to_keep.append(f)
        to_keep.append(e)
        
        data_final=data[to_keep]
        data_final.columns.values
        
        # Create X and y
        X = data_final.loc[:, data_final.columns != e]
        y = data_final.loc[:, data_final.columns == e]
        
        # format y to a 1D array
        y = y.values.ravel()
        
        # format X when more than one parameter are used as input
        sc_x = StandardScaler()
        X = sc_x.fit_transform(X)
        X = pd.DataFrame(X)
        
        
        # Logistic Regression Model Fitting
        
        # Split data into training and testing sets (70 % training, 30 % testing)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=False, random_state=42)
        
#         # Split testing set into testing and validation sets (15 % training, 15 % validation of the whole data set --> 50/50 % of 30 %)
#         X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.5, random_state=42)
        
        # Print shapes of data sets
        print('Training Data Shape:', X_train.shape)
        print('Training Labels Shape:', y_train.shape)
        print('Testing Data Shape:', X_test.shape)
        print('Testing Labels Shape:', y_test.shape)
#         print('Validation Data Shape:', X_val.shape)
#         print('Validation Labels Shape:', y_val.shape)
        
        # Create and fit model
        logreg = LogisticRegression(max_iter=1000)
        logreg.fit(X_train, y_train)
        
        # accuracy on the training set
        accuracy_train = logreg.score(X_train, y_train)
        accuracy_train_percent = round(accuracy_train*100, 3)

        # Make predictions on the test set
        y_pred = logreg.predict(X_test)
        print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test, y_test)))
        
        # accuracy on the test set
        accuracy_test = logreg.score(X_test, y_test)
        accuracy_test_percent = round(accuracy_test*100, 3)
        print(accuracy_test)
        
#         # Make predictions on the validation set
#         y_pred_val = logreg.predict(X_val)
#         print('Accuracy of logistic regression classifier on validation set: {:.2f}'.format(logreg.score(X_val, y_val)))
        
#         # accuracy on the validation set
#         accuracy_val = logreg.score(X_val, y_val)
#         accuracy_val_percent = round(accuracy_val*100, 3)
#         print(accuracy_val)
        
        
        # Analysis of results
        
        # Confusion Matrix
        conf_matrix = confusion_matrix(y_test, y_pred)
        print(conf_matrix)
        # check if not only 0s are predicted
        count_no_occ_pred = np.count_nonzero(y_pred == 0)
        print('Number of 0 predicted: ', count_no_occ_pred)
        count_occ_pred = np.count_nonzero(y_pred == 1)
        print('Number of 1 predicted: ', count_occ_pred)
        
        # Classification Report
        print(classification_report(y_test, y_pred))
        
        # Precision
        precision = precision_score(y_test, y_pred)
        precision_percent = round((precision*100),3)
        
        # Recall
        recall = recall_score(y_test, y_pred)
        recall_percent = round((recall*100),3)

        # F1 Score
        f1 = f1_score(y_test, y_pred)
        f1_percent = round((f1*100),3)

        # Matthews Correlation Coefficient
        mcc = matthews_corrcoef(y_test, y_pred)
        
        # ROC
        logit_roc_auc = roc_auc_score(y_test, logreg.predict(X_test))
        fpr, tpr, thresholds = roc_curve(y_test, logreg.predict_proba(X_test)[:,1])
        plt.figure()
        plt.plot(fpr, tpr, label='Logistic Regression (area = %0.2f)' % logit_roc_auc)
        plt.plot([0,1], [0,1], 'r--')
        plt.xlim([0.0, 1.0])
        plt.ylim([0.0, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('Receiver operating characteristic - '+e)
        plt.legend(loc="lower right")
        plt.savefig('Logistic_Regression_ROC_'+a+'_one_parameter_'+f+'_forecast_'+e+'.pdf', bbox_inches='tight', dpi=100)
        plt.close()
        
        
        # Save results
        
        if d==1 and f=='E07CO2':
            # Creating csv file with results for model with all variables
            with open('Logistic_Regression_results_'+a+'_one_parameter.csv', 'w', newline='') as file:
                writer = csv.writer(file)
                writer.writerow(['Predicted Timestep','Input', 'Accuracy - Training Set', 'Accuracy - Test Set',  'Precision', 'Recall',  'F1-Score', 'Matthews Correlation Coefficient'])
                writer.writerow([e, f, accuracy_train, accuracy_test, precision, recall, f1,  mcc])
                
        else:
            # Appending results to existing csv file
            with open('Logistic_Regression_results_'+a+'_one_parameter.csv', 'a', newline='') as file:
                writer = csv.writer(file)
                writer.writerow([e, f, accuracy_train, accuracy_test, precision, recall, f1, mcc])

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8334700574241182
[[20191   128]
 [ 3932   129]]
Number of 0 predicted:  24123
Number of 1 predicted:  257
              precision    recall  f1-score   support

         0.0       0.84      0.99      0.91     20319
         1.0       0.50      0.03      0.06      4061

    accuracy                           0.83     24380
   macro avg       0.67      0.51      0.48     24380
weighted avg       0.78      0.83      0.77     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8278917145200985
[[19927   392]
 [ 3804   257]]
Number of 0 predicted:  23731
Number of 1 predicted:  649
              precision    recall  f1-score   support

         0.0       0.84     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8334290401968827
[[20319     0]
 [ 4061     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20319
         1.0       0.00      0.00      0.00      4061

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8894995898277277
[[19875   444]
 [ 2250  1811]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.90      0.98      0.94     20319
         1.0       0.80      0.45      0.57      4061

    accuracy                           0.89     24380
   macro avg       0.85      0.71      0.75     24380
weighted avg       0.88      0.89      0.88     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8334290401968827
[[20319     0]
 [ 4061     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.97
0.9710828547990156
[[19967   352]
 [  353  3708]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98     20319
         1.0       0.91      0.91      0.91      4061

    accuracy                           0.97     24380
   macro avg       0.95      0.95      0.95     24380
weighted avg       0.97      0.97      0.97     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.95
0.9480721903199344
[[19687   632]
 [  634  3427]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.97   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333880229696472
[[20318     0]
 [ 4062     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20318
         1.0       0.00      0.00      0.00      4062

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8888022969647252
[[19866   452]
 [ 2259  1803]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.90      0.98      0.94     20318
         1.0       0.80      0.44      0.57      4062

    accuracy                           0.89     24380
   macro avg       0.85      0.71      0.75     24380
weighted avg       0.88      0.89      0.88     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333880229696472
[[20318     0]
 [ 4062     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.95
0.9480721903199344
[[19686   632]
 [  634  3428]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.97      0.97      0.97     20318
         1.0       0.84      0.84      0.84      4062

    accuracy                           0.95     24380
   macro avg       0.91      0.91      0.91     24380
weighted avg       0.95      0.95      0.95     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.93
0.9311320754716981
[[19480   838]
 [  841  3221]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.96   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333470057424118
[[20317     0]
 [ 4063     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20317
         1.0       0.00      0.00      0.00      4063

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8850697292863002
[[19820   497]
 [ 2305  1758]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.90      0.98      0.93     20317
         1.0       0.78      0.43      0.56      4063

    accuracy                           0.89     24380
   macro avg       0.84      0.70      0.75     24380
weighted avg       0.88      0.89      0.87     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333470057424118
[[20317     0]
 [ 4063     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.93
0.9311320754716981
[[19479   838]
 [  841  3222]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.96      0.96      0.96     20317
         1.0       0.79      0.79      0.79      4063

    accuracy                           0.93     24380
   macro avg       0.88      0.88      0.88     24380
weighted avg       0.93      0.93      0.93     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.92
0.9182116488925348
[[19322   995]
 [  999  3064]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.95   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333059885151763
[[20316     0]
 [ 4064     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20316
         1.0       0.00      0.00      0.00      4064

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8816652994257588
[[19778   538]
 [ 2347  1717]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93     20316
         1.0       0.76      0.42      0.54      4064

    accuracy                           0.88     24380
   macro avg       0.83      0.70      0.74     24380
weighted avg       0.87      0.88      0.87     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8333059885151763
[[20316     0]
 [ 4064     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.92
0.9182116488925348
[[19321   995]
 [  999  3065]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.95      0.95      0.95     20316
         1.0       0.75      0.75      0.75      4064

    accuracy                           0.92     24380
   macro avg       0.85      0.85      0.85     24380
weighted avg       0.92      0.92      0.92     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.91
0.9083264971287941
[[19201  1115]
 [ 1120  2944]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.94   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.833264971287941
[[20315     0]
 [ 4065     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20315
         1.0       0.00      0.00      0.00      4065

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8789991796554553
[[19745   570]
 [ 2380  1685]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93     20315
         1.0       0.75      0.41      0.53      4065

    accuracy                           0.88     24380
   macro avg       0.82      0.69      0.73     24380
weighted avg       0.87      0.88      0.86     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.833264971287941
[[20315     0]
 [ 4065     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.91
0.9083264971287941
[[19200  1115]
 [ 1120  2945]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.94      0.95      0.94     20315
         1.0       0.73      0.72      0.72      4065

    accuracy                           0.91     24380
   macro avg       0.84      0.83      0.83     24380
weighted avg       0.91      0.91      0.91     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.90
0.900656275635767
[[19107  1208]
 [ 1214  2851]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.94    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8332239540607055
[[20314     0]
 [ 4066     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20314
         1.0       0.00      0.00      0.00      4066

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8332239540607055
[[20314     0]
 [ 4066     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20314
         1.0       0.00      0.00      0.00      4066

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8762510254306809
[[19711   603]
 [ 2414  1652]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93     20314
         1.0       0.73      0.41      0.52      4066

    accuracy                           0.88     24380
   macro avg       0.81      0.69      0.73     24380
weighted avg       0.86      0.88      0.86     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8332239540607055
[[20314     0]
 [ 4066     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.90
0.900656275635767
[[19106  1208]
 [ 1214  2852]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.94      0.94      0.94     20314
         1.0       0.70      0.70      0.70      4066

    accuracy                           0.90     24380
   macro avg       0.82      0.82      0.82     24380
weighted avg       0.90      0.90      0.90     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8939704675963905
[[19025  1289]
 [ 1296  2770]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.94    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331829368334701
[[20313     0]
 [ 4067     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20313
         1.0       0.00      0.00      0.00      4067

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331829368334701
[[20313     0]
 [ 4067     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20313
         1.0       0.00      0.00      0.00      4067

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8733388022969647
[[19675   638]
 [ 2450  1617]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93     20313
         1.0       0.72      0.40      0.51      4067

    accuracy                           0.87     24380
   macro avg       0.80      0.68      0.72     24380
weighted avg       0.86      0.87      0.86     24380

Training Data Shape: (73137, 1)
Training Labels Sh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8939704675963905
[[19024  1289]
 [ 1296  2771]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.94      0.94      0.94     20313
         1.0       0.68      0.68      0.68      4067

    accuracy                           0.89     24380
   macro avg       0.81      0.81      0.81     24380
weighted avg       0.89      0.89      0.89     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8876948318293684
[[18948  1365]
 [ 1373  2694]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.93   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331419196062346
[[20312     0]
 [ 4068     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20312
         1.0       0.00      0.00      0.00      4068

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331419196062346
[[20312     0]
 [ 4068     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20312
         1.0       0.00      0.00      0.00      4068

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8705086136177195
[[19640   672]
 [ 2485  1583]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.93     20312
         1.0       0.70      0.39      0.50      4068

    accuracy                           0.87     24380
   macro avg       0.79      0.68      0.71     24380
weighted avg       0.86      0.87      0.85     24380

Training Data Shape: (73137, 1)
Training Labels Sh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.89
0.8876948318293684
[[18947  1365]
 [ 1373  2695]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93     20312
         1.0       0.66      0.66      0.66      4068

    accuracy                           0.89     24380
   macro avg       0.80      0.80      0.80     24380
weighted avg       0.89      0.89      0.89     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8817473338802297
[[18875  1437]
 [ 1446  2622]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.93   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331009023789991
[[20311     0]
 [ 4069     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20311
         1.0       0.00      0.00      0.00      4069

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8331009023789991
[[20311     0]
 [ 4069     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20311
         1.0       0.00      0.00      0.00      4069

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8675963904840033
[[19604   707]
 [ 2521  1548]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.89      0.97      0.92     20311
         1.0       0.69      0.38      0.49      4069

    accuracy                           0.87     24380
   macro avg       0.79      0.67      0.71     24380
weighted avg       0.85      0.87      0.85     24380

Training Data Shape: (73137, 1)
Training Labels Sh

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8817473338802297
[[18874  1437]
 [ 1446  2623]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93     20311
         1.0       0.65      0.64      0.65      4069

    accuracy                           0.88     24380
   macro avg       0.79      0.79      0.79     24380
weighted avg       0.88      0.88      0.88     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8767022149302707
[[18813  1498]
 [ 1508  2561]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.93   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330598851517638
[[20310     0]
 [ 4070     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20310
         1.0       0.00      0.00      0.00      4070

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330598851517638
[[20310     0]
 [ 4070     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20310
         1.0       0.00      0.00      0.00      4070

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.86
0.8644380639868745
[[19565   745]
 [ 2560  1510]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.88      0.96      0.92     20310
         1.0       0.67      0.37      0.48      4070

    accuracy                           0.86     24380
   macro avg       0.78      0.67      0.70     24380
weighted avg       0.85      0.86      0.85     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330598851517638
[[20310     0]
 [ 4070     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.88
0.8767022149302707
[[18812  1498]
 [ 1508  2562]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.93      0.93      0.93     20310
         1.0       0.63      0.63      0.63      4070

    accuracy                           0.88     24380
   macro avg       0.78      0.78      0.78     24380
weighted avg       0.88      0.88      0.88     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8710008203445447
[[18743  1567]
 [ 1578  2492]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.92   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20309
         1.0       0.00      0.00      0.00      4071

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330188679245283
[[20309     0]
 [ 4071     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20309
         1.0       0.00      0.00      0.00      4071

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330188679245283
[[20309     0]
 [ 4071     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20309
         1.0       0.00      0.00      0.00      4071

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.86
0.8626743232157507
[[19543   766]
 [ 2582  1489]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.88      0.96      0.92     20309
         1.0       0.66      0.37      0.47      4071

    accuracy                           0.86     24380
   macro avg       0.77      0.66      0.70     24380
weighted avg       0.85      0.86      0.85     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8330188679245283
[[20309     0]
 [ 4071     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8710008203445447
[[18742  1567]
 [ 1578  2493]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.92      0.92      0.92     20309
         1.0       0.61      0.61      0.61      4071

    accuracy                           0.87     24380
   macro avg       0.77      0.77      0.77     24380
weighted avg       0.87      0.87      0.87     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8653814602132895
[[18674  1635]
 [ 1647  2424]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.92   

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8329778506972929
[[20308     0]
 [ 4072     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20308
         1.0       0.00      0.00      0.00      4072

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8329778506972929
[[20308     0]
 [ 4072     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      1.00      0.91     20308
         1.0       0.00      0.00      0.00      4072

    accuracy                           0.83     24380
   macro avg       0.42      0.50      0.45     24380
weighted avg       0.69      0.83      0.76     24380



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.86
0.8603363412633306
[[19514   794]
 [ 2611  1461]]
Number of 0 predicted:  22125
Number of 1 predicted:  2255
              precision    recall  f1-score   support

         0.0       0.88      0.96      0.92     20308
         1.0       0.65      0.36      0.46      4072

    accuracy                           0.86     24380
   macro avg       0.76      0.66      0.69     24380
weighted avg       0.84      0.86      0.84     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.83
0.8329778506972929
[[20308     0]
 [ 4072     0]]
Number of 0 predicted:  24380
Number of 1 predicted:  0
              precision    recall  f1-score   support

         0.0       0.83      

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  mcc = cov_ytyp / np.sqrt(cov_ytyt * cov_ypyp)


Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.87
0.8653814602132895
[[18673  1635]
 [ 1647  2425]]
Number of 0 predicted:  20320
Number of 1 predicted:  4060
              precision    recall  f1-score   support

         0.0       0.92      0.92      0.92     20308
         1.0       0.60      0.60      0.60      4072

    accuracy                           0.87     24380
   macro avg       0.76      0.76      0.76     24380
weighted avg       0.87      0.87      0.87     24380

Training Data Shape: (73137, 1)
Training Labels Shape: (73137,)
Testing Data Shape: (24380, 1)
Testing Labels Shape: (24380,)
Accuracy of logistic regression classifier on test set: 0.86
0.8605824446267433
[[18615  1693]
 [ 1706  2366]]
Number of 0 predicted:  20321
Number of 1 predicted:  4059
              precision    recall  f1-score   support

         0.0       0.92   