In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_fscore_support, log_loss, f1_score

In [2]:
def load_input(csv_path):
    """Load dataset from a CSV file.

    Args:
         csv_path: Path to CSV file containing dataset.

    Returns:
        xs: Numpy array of x-values (inputs).
    """
    df = pd.read_csv(csv_path, index_col=0, header=0).to_numpy()
    train_X = df[:, 3:]
    # print(train_X.shape)

    return train_X

def load_labels(csv_path):
    """Load dataset from a CSV file.

    Args:
         csv_path: Path to CSV file containing dataset.

    Returns:
        ys: Numpy arrays of y-values (labels).
    """
    df = pd.read_csv(csv_path, index_col=0, header=0).to_numpy()
    train_Y = df

    return train_Y

In [3]:
data_feature = load_input('train_features.csv')
data_labels = load_labels('train_targets_scored.csv')
labels_shape = data_labels.shape
feature_shape = data_feature.shape
# print(labels_shape)
# print(feature_shape)
print("Training dataset load finished")
# for_shuffle = np.append(data_feature, data_labels, axis=1)
# # print(for_shuffle.shape)
# np.random.seed(0)
# np.random.shuffle(for_shuffle)

# train_X = for_shuffle[:17861, :feature_shape[1]+1]
# train_Y = for_shuffle[:17861, feature_shape[1]+1:]
# test_X = for_shuffle[17861:, :feature_shape[1]+1]
# test_Y = for_shuffle[17861:, feature_shape[1]+1:]

train_X, test_X, train_Y, test_Y = train_test_split(data_feature, data_labels, test_size=0.25, random_state=0)

print("train_X shape: ", train_X.shape)
print("train_Y shape: ", train_Y.shape)
print("test_X shape: ", test_X.shape)
print("test_Y shape: ", test_Y.shape)

Training dataset load finished
train_X shape:  (17860, 872)
train_Y shape:  (17860, 206)
test_X shape:  (5954, 872)
test_Y shape:  (5954, 206)


In [4]:
# logreg.train_and_predict_logreg(train_X, train_Y[:,0], test_X, 0.01)
# print(labels_shape[1])
accuracy = []
F1s = []
precisions = []
recalls = []
output_matrix = np.zeros((test_Y.shape))

print("---------- Beginning of Training --------")
for label in range(labels_shape[1]): 
    # Start of each label
    print("Label: ", label)

    if np.count_nonzero(train_Y[:, label]) == 0: 
        prediction = np.zeros((test_Y.shape[0], ))
    elif np.count_nonzero(train_Y[:, label]) == len(train_Y[:, label]): 
        prediction = np.ones((test_Y.shape[0], ))
    else: 

        logreg = LogisticRegression(solver='lbfgs')
        logreg.fit(train_X, train_Y[:, label])


        prediction = logreg.predict(test_X)
        print("Number of Nonzero: ", np.count_nonzero(prediction))
        print(prediction.shape)

        # Accuracy
        score = logreg.score(test_X, test_Y[:, label])
        accuracy.append(score)
        
        # F1
        precision, recall, fscore, _ = precision_recall_fscore_support(test_Y[:, label], prediction)
        F1s.append(fscore)
        precisions.append(precision)
        recalls.append(recall)

        output_matrix[:, label] = prediction

        print("-----------------------------------------------------------------")


print("Accuracy: ", accuracy)
print(len(accuracy))
print("F1 score: ", F1s)
print(len(F1s))
print("Precision: ", precisions)
print(len(precisions))
print("Recall: ", recalls)
print(len(recalls))


loss = log_loss(test_Y, output_matrix)

print("Log loss: ", loss)

---------- Beginning of Training --------
Label:  0
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  1


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  2


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  3




Number of Nonzero:  81
(5954,)
-----------------------------------------------------------------
Label:  4




Number of Nonzero:  87
(5954,)
-----------------------------------------------------------------
Label:  5




Number of Nonzero:  11
(5954,)
-----------------------------------------------------------------
Label:  6




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  7




Number of Nonzero:  22
(5954,)
-----------------------------------------------------------------
Label:  8
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  9




Number of Nonzero:  74
(5954,)
-----------------------------------------------------------------
Label:  10




Number of Nonzero:  38
(5954,)
-----------------------------------------------------------------
Label:  11




Number of Nonzero:  8
(5954,)
-----------------------------------------------------------------
Label:  12
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  13




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  14
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  15


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  16


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  17


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  13
(5954,)
-----------------------------------------------------------------
Label:  18




Number of Nonzero:  7
(5954,)
-----------------------------------------------------------------
Label:  19




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  20




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  21


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  7
(5954,)
-----------------------------------------------------------------
Label:  22
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  23


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  24
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  25


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  26


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  27


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  28


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  29




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  30
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  31


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  32




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  33
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  34


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  35
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  36




Number of Nonzero:  10
(5954,)
-----------------------------------------------------------------
Label:  37
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  38




Number of Nonzero:  16
(5954,)
-----------------------------------------------------------------
Label:  39
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  40




Number of Nonzero:  6
(5954,)
-----------------------------------------------------------------
Label:  41




Number of Nonzero:  11
(5954,)
-----------------------------------------------------------------
Label:  42
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  43




Number of Nonzero:  74
(5954,)
-----------------------------------------------------------------
Label:  44




Number of Nonzero:  9
(5954,)
-----------------------------------------------------------------
Label:  45




Number of Nonzero:  38
(5954,)
-----------------------------------------------------------------
Label:  46
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  47




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  48




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  49




Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  50
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  51


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  8
(5954,)
-----------------------------------------------------------------
Label:  52
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  53
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  54


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  72
(5954,)
-----------------------------------------------------------------
Label:  55




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  56


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  57




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  58


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  59
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  60


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  61




Number of Nonzero:  22
(5954,)
-----------------------------------------------------------------
Label:  62
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  63


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  74
(5954,)
-----------------------------------------------------------------
Label:  64




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  65




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  66




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  67




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  68




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  69
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  70


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  71




Number of Nonzero:  66
(5954,)
-----------------------------------------------------------------
Label:  72




Number of Nonzero:  20
(5954,)
-----------------------------------------------------------------
Label:  73




Number of Nonzero:  5
(5954,)
-----------------------------------------------------------------
Label:  74
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  75


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  76


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  77




Number of Nonzero:  74
(5954,)
-----------------------------------------------------------------
Label:  78




Number of Nonzero:  31
(5954,)
-----------------------------------------------------------------
Label:  79




Number of Nonzero:  59
(5954,)
-----------------------------------------------------------------
Label:  80




Number of Nonzero:  69
(5954,)
-----------------------------------------------------------------
Label:  81
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  82
Label:  83


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  53
(5954,)
-----------------------------------------------------------------
Label:  84




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  85




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  86
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  87
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  88


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  89




Number of Nonzero:  64
(5954,)
-----------------------------------------------------------------
Label:  90
Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  91
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  92


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  93




Number of Nonzero:  15
(5954,)
-----------------------------------------------------------------
Label:  94




Number of Nonzero:  85
(5954,)
-----------------------------------------------------------------
Label:  95




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  96




Number of Nonzero:  74
(5954,)
-----------------------------------------------------------------
Label:  97
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  98


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  9
(5954,)
-----------------------------------------------------------------
Label:  99




Number of Nonzero:  47
(5954,)
-----------------------------------------------------------------
Label:  100
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  101


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  7
(5954,)
-----------------------------------------------------------------
Label:  102




Number of Nonzero:  7
(5954,)
-----------------------------------------------------------------
Label:  103




Number of Nonzero:  16
(5954,)
-----------------------------------------------------------------
Label:  104




Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  105




Number of Nonzero:  81
(5954,)
-----------------------------------------------------------------
Label:  106
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  107


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  108




Number of Nonzero:  6
(5954,)
-----------------------------------------------------------------
Label:  109




Number of Nonzero:  53
(5954,)
-----------------------------------------------------------------
Label:  110




Number of Nonzero:  20
(5954,)
-----------------------------------------------------------------
Label:  111




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  112




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  113




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  114




Number of Nonzero:  8
(5954,)
-----------------------------------------------------------------
Label:  115




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  116


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  117




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  118


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  11
(5954,)
-----------------------------------------------------------------
Label:  119




Number of Nonzero:  49
(5954,)
-----------------------------------------------------------------
Label:  120
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  121


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  122


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  123


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  124


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  125
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  126


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  127




Number of Nonzero:  12
(5954,)
-----------------------------------------------------------------
Label:  128




Number of Nonzero:  10
(5954,)
-----------------------------------------------------------------
Label:  129
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  130


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  131


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  14
(5954,)
-----------------------------------------------------------------
Label:  132
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  133


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  29
(5954,)
-----------------------------------------------------------------
Label:  134




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  135




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  136


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  226
(5954,)
-----------------------------------------------------------------
Label:  137
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  138


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  139


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  140


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  141


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  142


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  143


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  144




Number of Nonzero:  24
(5954,)
-----------------------------------------------------------------
Label:  145




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  146




Number of Nonzero:  6
(5954,)
-----------------------------------------------------------------
Label:  147
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  148


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  149




Number of Nonzero:  52
(5954,)
-----------------------------------------------------------------
Label:  150
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  151


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  109
(5954,)
-----------------------------------------------------------------
Label:  152
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  153




Number of Nonzero:  34
(5954,)
-----------------------------------------------------------------
Label:  154




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  155




Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  156




Number of Nonzero:  25
(5954,)
-----------------------------------------------------------------
Label:  157




Number of Nonzero:  17
(5954,)
-----------------------------------------------------------------
Label:  158




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  159




Number of Nonzero:  23
(5954,)
-----------------------------------------------------------------
Label:  160
Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  161




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  162




Number of Nonzero:  12
(5954,)
-----------------------------------------------------------------
Label:  163




Number of Nonzero:  194
(5954,)
-----------------------------------------------------------------
Label:  164




Number of Nonzero:  3
(5954,)
-----------------------------------------------------------------
Label:  165
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  166




Number of Nonzero:  13
(5954,)
-----------------------------------------------------------------
Label:  167




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  168


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  169




Number of Nonzero:  50
(5954,)
-----------------------------------------------------------------
Label:  170
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  171


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  8
(5954,)
-----------------------------------------------------------------
Label:  172
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  173


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  174




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  175




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  176




Number of Nonzero:  92
(5954,)
-----------------------------------------------------------------
Label:  177




Number of Nonzero:  64
(5954,)
-----------------------------------------------------------------
Label:  178




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  179




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  180


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  181


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  182


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  53
(5954,)
-----------------------------------------------------------------
Label:  183




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  184




Number of Nonzero:  7
(5954,)
-----------------------------------------------------------------
Label:  185
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  186


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  187




Number of Nonzero:  5
(5954,)
-----------------------------------------------------------------
Label:  188
Number of Nonzero:  4
(5954,)
-----------------------------------------------------------------
Label:  189
Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  190




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  191




Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  192


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)


  'precision', 'predicted', average, warn_for)


-----------------------------------------------------------------
Label:  193




Number of Nonzero:  1
(5954,)
-----------------------------------------------------------------
Label:  194




Number of Nonzero:  28
(5954,)
-----------------------------------------------------------------
Label:  195
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  196


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  197


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  198




Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Label:  199




Number of Nonzero:  59
(5954,)
-----------------------------------------------------------------
Label:  200




Number of Nonzero:  5
(5954,)
-----------------------------------------------------------------
Label:  201
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  202


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  27
(5954,)
-----------------------------------------------------------------
Label:  203
Number of Nonzero:  0
(5954,)
-----------------------------------------------------------------
Label:  204


  'precision', 'predicted', average, warn_for)


Number of Nonzero:  6
(5954,)
-----------------------------------------------------------------
Label:  205
Number of Nonzero:  2
(5954,)
-----------------------------------------------------------------
Accuracy:  [0.9993281827342962, 0.9991602284178703, 0.9988243197850185, 0.9781659388646288, 0.975142761168962, 0.9952972791400739, 0.9981525025193148, 0.9936177359758146, 0.9996640913671482, 0.9800134363453141, 0.9798454820288881, 0.9979845482028888, 0.9996640913671482, 0.9979845482028888, 0.9996640913671482, 0.9998320456835741, 0.997480685253611, 0.9947934161907961, 0.994961370507222, 0.9989922741014444, 0.9994961370507222, 0.9956331877729258, 0.9993281827342962, 0.9981525025193148, 0.9996640913671482, 0.9994961370507222, 0.9996640913671482, 0.9991602284178703, 0.9956331877729258, 0.9981525025193148, 0.9993281827342962, 0.997480685253611, 0.9973127309371851, 0.9993281827342962, 1.0, 0.9989922741014444, 0.9954652334564998, 0.9988243197850185, 0.9976486395700369, 0.9996640913671482, 0.9

In [5]:
# logreg.train_and_predict_logreg(train_X, train_Y[:,0], test_X, 0.01)
# print(labels_shape[1])
balanced_accuracy = []
balanced_F1s = []
balanced_precisions = []
balanced_recalls = []
balanced_output_matrix = np.zeros((test_Y.shape))
# labels_shape[1]
print("---------- Beginning of Training --------")
for label in range(labels_shape[1]): 
    # Start of each label
    print("Label: ", label)

    if np.count_nonzero(train_Y[:, label]) == 0: 
        prediction = np.zeros((test_Y.shape[0], ))
    elif np.count_nonzero(train_Y[:, label]) == len(train_Y[:, label]): 
        prediction = np.ones((test_Y.shape[0], ))
    else: 
        # Balance the dataset
        num_1 = np.count_nonzero(train_Y[:, label])
#         print(f"There are {num_1} 1s in {label}")
        num_0 = train_Y.shape[0]-num_1
#         print(f"There are {num_0} 0s in {label}")
        kappa = num_1/num_0
#         print("Kappa: ", kappa)
        
        x_expanded = []
        y_expanded = []
        k = 0
        for i in range(train_Y.shape[0]): 
            if train_Y[i, label] == 0:
                x_expanded.append(train_X[i, :])
                y_expanded.append(train_Y[i, label])
                
            elif train_Y[i, label] == 1:
                k+=1
                for j in range(int(1/kappa)): 
#                     print("J: ", j)
                    x_expanded.append(train_X[i, :])
                    y_expanded.append(train_Y[i, label])
        
        x_expanded = np.array(x_expanded)
        y_expanded = np.array(y_expanded)
#         print("X expanded: ", x_expanded.shape)
#         print("Y expanded: ", y_expanded.shape)
        
        logreg = LogisticRegression(solver='lbfgs')
        
        logreg.fit(x_expanded, y_expanded)
        
        prediction = logreg.predict(test_X)
#         print("Number of Nonzero: ", np.count_nonzero(prediction))
        print(prediction.shape)

        # Accuracy
        score = logreg.score(test_X, test_Y[:, label])
        balanced_accuracy.append(score)
        
        # F1
        precision, recall, fscore, _ = precision_recall_fscore_support(test_Y[:, label], prediction)
        balanced_F1s.append(fscore)
        balanced_precisions.append(precision)
        balanced_recalls.append(recall)

        balanced_output_matrix[:, label] = prediction

        print("-----------------------------------------------------------------")

# print(k)
# print("Accuracy: ", accuracy)
# print(len(accuracy))
# print("F1 score: ", F1s)
# print(len(F1s))
# print("Precision: ", precisions)
# print(len(precisions))
# print("Recall: ", recalls)
# print(len(recalls))


balanced_loss = log_loss(test_Y, balanced_output_matrix)

print("Log loss: ", balanced_loss)

---------- Beginning of Training --------
Label:  0
(5954,)
-----------------------------------------------------------------
Label:  1


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  2


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  3




(5954,)
-----------------------------------------------------------------
Label:  4




(5954,)
-----------------------------------------------------------------
Label:  5




(5954,)
-----------------------------------------------------------------
Label:  6




(5954,)
-----------------------------------------------------------------
Label:  7




(5954,)
-----------------------------------------------------------------
Label:  8
(5954,)
-----------------------------------------------------------------
Label:  9




(5954,)
-----------------------------------------------------------------
Label:  10




(5954,)
-----------------------------------------------------------------
Label:  11




(5954,)
-----------------------------------------------------------------
Label:  12
(5954,)
-----------------------------------------------------------------
Label:  13




(5954,)
-----------------------------------------------------------------
Label:  14




(5954,)
-----------------------------------------------------------------
Label:  15


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  16


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  17




(5954,)
-----------------------------------------------------------------
Label:  18




(5954,)
-----------------------------------------------------------------
Label:  19




(5954,)
-----------------------------------------------------------------
Label:  20




(5954,)
-----------------------------------------------------------------
Label:  21




(5954,)
-----------------------------------------------------------------
Label:  22
(5954,)
-----------------------------------------------------------------
Label:  23


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  24




(5954,)
-----------------------------------------------------------------
Label:  25


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  26




(5954,)
-----------------------------------------------------------------
Label:  27


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  28


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  29




(5954,)
-----------------------------------------------------------------
Label:  30




(5954,)
-----------------------------------------------------------------
Label:  31




(5954,)
-----------------------------------------------------------------
Label:  32




(5954,)
-----------------------------------------------------------------
Label:  33
(5954,)
-----------------------------------------------------------------
Label:  34


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  35
(5954,)
-----------------------------------------------------------------
Label:  36




(5954,)
-----------------------------------------------------------------
Label:  37




(5954,)
-----------------------------------------------------------------
Label:  38




(5954,)
-----------------------------------------------------------------
Label:  39
(5954,)
-----------------------------------------------------------------
Label:  40


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  41




(5954,)
-----------------------------------------------------------------
Label:  42




(5954,)
-----------------------------------------------------------------
Label:  43




(5954,)
-----------------------------------------------------------------
Label:  44




(5954,)
-----------------------------------------------------------------
Label:  45




(5954,)
-----------------------------------------------------------------
Label:  46




(5954,)
-----------------------------------------------------------------
Label:  47




(5954,)
-----------------------------------------------------------------
Label:  48




(5954,)
-----------------------------------------------------------------
Label:  49




(5954,)
-----------------------------------------------------------------
Label:  50




(5954,)
-----------------------------------------------------------------
Label:  51


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  52




(5954,)
-----------------------------------------------------------------
Label:  53
(5954,)
-----------------------------------------------------------------
Label:  54


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  55




(5954,)
-----------------------------------------------------------------
Label:  56




(5954,)
-----------------------------------------------------------------
Label:  57




(5954,)
-----------------------------------------------------------------
Label:  58




(5954,)
-----------------------------------------------------------------
Label:  59




(5954,)
-----------------------------------------------------------------
Label:  60




(5954,)
-----------------------------------------------------------------
Label:  61




(5954,)
-----------------------------------------------------------------
Label:  62
(5954,)
-----------------------------------------------------------------
Label:  63


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  64




(5954,)
-----------------------------------------------------------------
Label:  65




(5954,)
-----------------------------------------------------------------
Label:  66




(5954,)
-----------------------------------------------------------------
Label:  67




(5954,)
-----------------------------------------------------------------
Label:  68




(5954,)
-----------------------------------------------------------------
Label:  69
(5954,)
-----------------------------------------------------------------
Label:  70




(5954,)
-----------------------------------------------------------------
Label:  71




(5954,)
-----------------------------------------------------------------
Label:  72




(5954,)
-----------------------------------------------------------------
Label:  73




(5954,)
-----------------------------------------------------------------
Label:  74




(5954,)
-----------------------------------------------------------------
Label:  75


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  76


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  77




(5954,)
-----------------------------------------------------------------
Label:  78




(5954,)
-----------------------------------------------------------------
Label:  79




(5954,)
-----------------------------------------------------------------
Label:  80




(5954,)
-----------------------------------------------------------------
Label:  81
(5954,)
-----------------------------------------------------------------
Label:  82
Label:  83


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  84




(5954,)
-----------------------------------------------------------------
Label:  85




(5954,)
-----------------------------------------------------------------
Label:  86
(5954,)
-----------------------------------------------------------------
Label:  87




(5954,)
-----------------------------------------------------------------
Label:  88


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  89




(5954,)
-----------------------------------------------------------------
Label:  90




(5954,)
-----------------------------------------------------------------
Label:  91
(5954,)
-----------------------------------------------------------------
Label:  92


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  93




(5954,)
-----------------------------------------------------------------
Label:  94




(5954,)
-----------------------------------------------------------------
Label:  95




(5954,)
-----------------------------------------------------------------
Label:  96




(5954,)
-----------------------------------------------------------------
Label:  97




(5954,)
-----------------------------------------------------------------
Label:  98




(5954,)
-----------------------------------------------------------------
Label:  99




(5954,)
-----------------------------------------------------------------
Label:  100




(5954,)
-----------------------------------------------------------------
Label:  101


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  102




(5954,)
-----------------------------------------------------------------
Label:  103




(5954,)
-----------------------------------------------------------------
Label:  104




(5954,)
-----------------------------------------------------------------
Label:  105




(5954,)
-----------------------------------------------------------------
Label:  106
(5954,)
-----------------------------------------------------------------
Label:  107




(5954,)
-----------------------------------------------------------------
Label:  108




(5954,)
-----------------------------------------------------------------
Label:  109




(5954,)
-----------------------------------------------------------------
Label:  110




(5954,)
-----------------------------------------------------------------
Label:  111




(5954,)
-----------------------------------------------------------------
Label:  112




(5954,)
-----------------------------------------------------------------
Label:  113




(5954,)
-----------------------------------------------------------------
Label:  114




(5954,)
-----------------------------------------------------------------
Label:  115




(5954,)
-----------------------------------------------------------------
Label:  116




(5954,)
-----------------------------------------------------------------
Label:  117




(5954,)
-----------------------------------------------------------------
Label:  118




(5954,)
-----------------------------------------------------------------
Label:  119




(5954,)
-----------------------------------------------------------------
Label:  120
(5954,)
-----------------------------------------------------------------
Label:  121


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  122


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  123
(5954,)
-----------------------------------------------------------------
Label:  124




(5954,)
-----------------------------------------------------------------
Label:  125
(5954,)
-----------------------------------------------------------------
Label:  126


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  127




(5954,)
-----------------------------------------------------------------
Label:  128




(5954,)
-----------------------------------------------------------------
Label:  129




(5954,)
-----------------------------------------------------------------
Label:  130




(5954,)
-----------------------------------------------------------------
Label:  131




(5954,)
-----------------------------------------------------------------
Label:  132




(5954,)
-----------------------------------------------------------------
Label:  133




(5954,)
-----------------------------------------------------------------
Label:  134




(5954,)
-----------------------------------------------------------------
Label:  135




(5954,)
-----------------------------------------------------------------
Label:  136




(5954,)
-----------------------------------------------------------------
Label:  137
(5954,)
-----------------------------------------------------------------
Label:  138


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  139




(5954,)
-----------------------------------------------------------------
Label:  140


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  141
(5954,)
-----------------------------------------------------------------
Label:  142


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  143




(5954,)
-----------------------------------------------------------------
Label:  144




(5954,)
-----------------------------------------------------------------
Label:  145




(5954,)
-----------------------------------------------------------------
Label:  146




(5954,)
-----------------------------------------------------------------
Label:  147




(5954,)
-----------------------------------------------------------------
Label:  148




(5954,)
-----------------------------------------------------------------
Label:  149




(5954,)
-----------------------------------------------------------------
Label:  150




(5954,)
-----------------------------------------------------------------
Label:  151




(5954,)
-----------------------------------------------------------------
Label:  152




(5954,)
-----------------------------------------------------------------
Label:  153




(5954,)
-----------------------------------------------------------------
Label:  154




(5954,)
-----------------------------------------------------------------
Label:  155




(5954,)
-----------------------------------------------------------------
Label:  156




(5954,)
-----------------------------------------------------------------
Label:  157




(5954,)
-----------------------------------------------------------------
Label:  158




(5954,)
-----------------------------------------------------------------
Label:  159




(5954,)
-----------------------------------------------------------------
Label:  160




(5954,)
-----------------------------------------------------------------
Label:  161




(5954,)
-----------------------------------------------------------------
Label:  162




(5954,)
-----------------------------------------------------------------
Label:  163




(5954,)
-----------------------------------------------------------------
Label:  164




(5954,)
-----------------------------------------------------------------
Label:  165
(5954,)
-----------------------------------------------------------------
Label:  166




(5954,)
-----------------------------------------------------------------
Label:  167




(5954,)
-----------------------------------------------------------------
Label:  168




(5954,)
-----------------------------------------------------------------
Label:  169




(5954,)
-----------------------------------------------------------------
Label:  170




(5954,)
-----------------------------------------------------------------
Label:  171


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  172
(5954,)
-----------------------------------------------------------------
Label:  173


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  174




(5954,)
-----------------------------------------------------------------
Label:  175




(5954,)
-----------------------------------------------------------------
Label:  176




(5954,)
-----------------------------------------------------------------
Label:  177




(5954,)
-----------------------------------------------------------------
Label:  178




(5954,)
-----------------------------------------------------------------
Label:  179




(5954,)
-----------------------------------------------------------------
Label:  180




(5954,)
-----------------------------------------------------------------
Label:  181




(5954,)
-----------------------------------------------------------------
Label:  182




(5954,)
-----------------------------------------------------------------
Label:  183




(5954,)
-----------------------------------------------------------------
Label:  184




(5954,)
-----------------------------------------------------------------
Label:  185
(5954,)
-----------------------------------------------------------------
Label:  186


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  187




(5954,)
-----------------------------------------------------------------
Label:  188




(5954,)
-----------------------------------------------------------------
Label:  189




(5954,)
-----------------------------------------------------------------
Label:  190




(5954,)
-----------------------------------------------------------------
Label:  191




(5954,)
-----------------------------------------------------------------
Label:  192
(5954,)
-----------------------------------------------------------------
Label:  193


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  194




(5954,)
-----------------------------------------------------------------
Label:  195
(5954,)
-----------------------------------------------------------------
Label:  196
(5954,)
-----------------------------------------------------------------
Label:  197


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  198




(5954,)
-----------------------------------------------------------------
Label:  199




(5954,)
-----------------------------------------------------------------
Label:  200




(5954,)
-----------------------------------------------------------------
Label:  201
(5954,)
-----------------------------------------------------------------
Label:  202


  'precision', 'predicted', average, warn_for)


(5954,)
-----------------------------------------------------------------
Label:  203




(5954,)
-----------------------------------------------------------------
Label:  204




(5954,)
-----------------------------------------------------------------
Label:  205




(5954,)
-----------------------------------------------------------------
Log loss:  14.382227498436377


In [6]:
# 'weighted'
# Calculate metrics for each label, and find their average weighted by support 
# (the number of true instances for each label). This alters ‘macro’ to account for label imbalance; 
# it can result in an F-score that is not between precision and recall.

imbalanced_F1_weighted = f1_score(test_Y, output_matrix, average='weighted')
balanced_F1_weighted = f1_score(test_Y, balanced_output_matrix, average='weighted')
print(imbalanced_F1_weighted)
print(balanced_F1_weighted)

  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


0.2842171097270266
0.280937488892575


In [7]:
# 'micro':
# Calculate metrics globally by counting the total true positives, false negatives and false positives.
imbalanced_F1_micro = f1_score(test_Y, output_matrix, average='micro')
balanced_F1_micro = f1_score(test_Y, balanced_output_matrix, average='micro')
print(imbalanced_F1_micro)
print(balanced_F1_micro)

0.3066648114651454
0.18170886498764113


In [8]:
# 'samples'
# Calculate metrics for each instance, and find their average 
# (only meaningful for multilabel classification where this differs from accuracy_score).

imbalanced_F1_samples = f1_score(test_Y, output_matrix, average='samples')
balanced_F1_samples = f1_score(test_Y, balanced_output_matrix, average='samples')
print(imbalanced_F1_samples)
print(balanced_F1_samples)

0.12799398563590703
0.139849585155262


  'precision', 'predicted', average, warn_for)
  'recall', 'true', average, warn_for)


In [9]:
# 'macro' 
# Calculate metrics for each label, and find their unweighted mean. 
# This does not take label imbalance into account.

imbalanced_F1_macro = f1_score(test_Y, output_matrix, average='macro')
balanced_F1_macro = f1_score(test_Y, balanced_output_matrix, average='macro')
print(imbalanced_F1_macro)
print(balanced_F1_macro)

0.1336983441084215
0.1506896050066669


In [16]:
submission = pd.read_csv('/kaggle/input/lish-moa/sample_submission.csv', index_col=0)
df = pd.DataFrame(output_matrix, index=submission.index, columns=submission.columns)
df.to_csv('submission_imbalanced.csv')
df = pd.DataFrame(balanced_output_matrix, index=submission.index, columns=submission.columns)
df.to_csv('submission_balanced.csv')