In [1]:
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd
pd.set_option('display.max_columns', 500)

from tqdm import trange

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, PowerTransformer, MinMaxScaler
from sklearn.metrics import log_loss

from tmpnn import Regression, Classification, L1, L2, Lyapunov1

In [2]:
def balanced_log_loss(y_true, y_pred):
    N0 = np.sum(1 - y_true)
    N1 = np.sum(y_true)

    p0 = np.clip(1 - y_pred, 1e-15, 1 - 1e-15)
    p1 = np.clip(y_pred, 1e-15, 1 - 1e-15)

    log_loss_0 = -np.sum((1 - y_true) * np.log(p0)) / N0
    log_loss_1 = -np.sum(y_true * np.log(p1)) / N1
    return (log_loss_0 + log_loss_1)/2

import tensorflow.keras.backend as K
def blloss(y_true, y_pred):
    y_pred=y_pred[:,-1]
    y_true=y_true[:,-1]
    
    N0 = K.sum(1 - y_true)
    N1 = K.sum(y_true)

    p0 = K.clip(1 - y_pred, 1e-15, 1 - 1e-15)
    p1 = K.clip(y_pred, 1e-15, 1 - 1e-15)

    log_loss_0 = -K.sum((1 - y_true) * K.log(p0)) / N0
    log_loss_1 = -K.sum(y_true * K.log(p1)) / N1
    return (log_loss_0 + log_loss_1)/2

In [3]:
xcols=pd.Index(['AB', 'AF', 'AM', 'AX', 'AZ', 'BC', 'BQ', 'CB', 'CC', 'CD ', 'CH', 'CR',
       'CU', 'CW ', 'DA', 'DE', 'DF', 'DH', 'DI', 'DL', 'DU', 'DY', 'EE', 'EH',
       'EL', 'EP', 'EU', 'FC', 'FD ', 'FE', 'FI', 'FL', 'FR', 'FS', 'GE', 'GI',
       'GL'],
      dtype='object')
print(*xcols)

AB AF AM AX AZ BC BQ CB CC CD  CH CR CU CW  DA DE DF DH DI DL DU DY EE EH EL EP EU FC FD  FE FI FL FR FS GE GI GL


In [20]:
df = pd.read_csv('train.csv')
df = df.fillna(df.mean(numeric_only=True))
X, Y = df.drop(columns=['Id','Class']), np.asfarray(df['Class'])

X['EJ'] = LabelEncoder().fit_transform(X['EJ'])-0.5

# X_tr, X_val, Y_tr, Y_val = train_test_split(X, Y, test_size=0.3, random_state=17)

pt = PowerTransformer()
mms = MinMaxScaler((-0.5,0.5))
X = mms.fit_transform(pt.fit_transform(X))
# X_val = mms.transform(pt.transform(X_val))
X_tr, X_val, Y_tr, Y_val = train_test_split(X, Y, test_size=0.3, random_state=1)


In [10]:
model = Regression(X.shape[1], 1, 2, 10, 4e-4, regularizer=L1(0.02), scale=0)
history = model.fit(X_tr, Y_tr, epochs=1000, verbose=1, 
                    validation_data=(X_val, Y_val), stop_monitor='loss', patience=200)

print('train', balanced_log_loss(Y_tr, model.predict(X_tr)[:, 0]))
print('valid', balanced_log_loss(Y_val, model.predict(X_val)[:, 0]))
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

KeyboardInterrupt: 

In [21]:
model = Classification(X.shape[1], 1, 2, 10, regularizer=L1(0.05))

model.set_learning_rate(5e-4, metrics=blloss)
history = model.fit(X_tr, Y_tr, validation_data=(X_val, Y_val), 
                    epochs=1000, verbose=1,
                    class_weight=pd.Series(Y_tr).value_counts(normalize=True).to_dict())
model.set_learning_rate(1e-4, metrics=blloss)

history = model.fit(X_tr, Y_tr, validation_data=(X_val, Y_val), 
                    epochs=700, verbose=1,
                    class_weight=pd.Series(Y_tr).value_counts(normalize=True).to_dict())

model.set_learning_rate(1e-5, loss=blloss)
history = model.fit(X_tr, Y_tr, validation_data=(X_val, Y_val), 
                    epochs=500, verbose=1,
                    stop_monitor='val_loss', patience=2,
                    class_weight=pd.Series(Y_tr).value_counts(normalize=True).to_dict())

print('train', balanced_log_loss(Y_tr, model.predict(X_tr)[:, 0]))
print('valid', balanced_log_loss(Y_val, model.predict(X_val)[:, 0]))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

In [22]:

model.set_learning_rate(1e-8, loss=blloss)
history = model.fit(X_tr, Y_tr, validation_data=(X_val, Y_val), 
                    epochs=500, verbose=1,
                    stop_monitor='val_loss', patience=2,
                    class_weight=pd.Series(Y_tr).value_counts(normalize=True).to_dict())

print('train', balanced_log_loss(Y_tr, model.predict(X_tr)[:, 0]))
print('valid', balanced_log_loss(Y_val, model.predict(X_val)[:, 0]))

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

# Submit

In [197]:
pd.DataFrame(np.vstack(model.pnn.get_weights())).to_csv('weights.csv',index=False)

In [201]:
weights = pd.read_csv('weights.csv').values
W0 = weights[0]
W1 = weights[1:58]
W2 = weights[58:]
print(W0.shape, W1.shape, W2.shape)

(57,) (57, 57) (3249, 57)


In [207]:
def tm_map(X, weights=None, steps=10, target_dim = 1):
    # W0, W1, W2 = weights
    X = np.hstack((X, np.zeros((X.shape[0], target_dim))))
    for _ in trange(steps):
        X = W0 + np.dot(X, W1) + np.dot((X[:,:,None]*X[:,None,:]).reshape(X.shape[0],-1), W2)
    return X[:,-1]

In [212]:
df = pd.read_csv('train.csv')
df = df.fillna(df.mean(numeric_only=True))
X, Y = df.drop(columns=['Id','Class']), df['Class']
X['EJ'] = LabelEncoder().fit_transform(X['EJ'])-0.5
X = PowerTransformer().fit_transform(X)
X = MinMaxScaler((-0.5,0.5)).fit_transform(X)

In [217]:
print(balanced_log_loss(Y, np.clip((tm_map(X)+0.5),0,1)))

  0%|          | 0/10 [00:00<?, ?it/s]

100%|██████████| 10/10 [00:00<00:00, 34.31it/s]

0.23209742188705307



