# Import libraries

In [None]:
# Basic stuff
import pandas as pd 
import matplotlib.pyplot as plt
import numpy as np

# Specific machine learning packages
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split

# Import of some keras packages
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Activation
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.regularizers import l2
from tensorflow.keras import backend as K

# Load dataset

In [None]:
# Load the data
train_data = np.array(pd.read_csv('train.csv'))[:,0]
train_solution = np.array(pd.read_csv('train.csv'))[:,1]
test_data = np.array(pd.read_csv('test.csv'))

# Preprocess dataset

### Split & One hot encoding

In [None]:
# This function will split the char
def split(listwords):
    length = listwords.shape[0]
    output = []
    for i in range(0,length):
        output.append(np.array([char for char in listwords[i]]))
    
    return np.array(output)

# Classical onehot encoding
def onehotencode(data):
    encoder = OneHotEncoder(sparse=False)
    return encoder.fit_transform(data)

# Preprocessing of the input data
encoded_train_X = onehotencode(split(train_data))

### Split for training

In [None]:
# Split the dataset to validate the training and avoid overfitting
X_train,X_test,y_train,y_test = train_test_split(encoded_train_X,train_solution,test_size = 0.0001, random_state = 42)
X_train = encoded_train_X
y_train = train_solution

# Train dataset

In [None]:
## source code
#https://stackoverflow.com/questions/59963911/how-to-write-a-custom-f1-loss-function-with-weighted-average-for-keras
def f1_weighted(true, pred):
    ground_positives = K.sum(true, axis=0) + K.epsilon()       # = TP + FN
    pred_positives = K.sum(pred, axis=0) + K.epsilon()         # = TP + FP
    true_positives = K.sum(true * pred, axis=0) + K.epsilon()  # = TP
    
    precision = true_positives / pred_positives 
    recall = true_positives / ground_positives

    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())

    weighted_f1 = f1 * ground_positives / K.sum(ground_positives) 
    weighted_f1 = K.sum(weighted_f1)

    return weighted_f1

### Creation of the model

In [None]:
# Model declaration
model = Sequential()
model.add(Dense(150,input_shape=X_train[0].shape))
model.add(Activation('relu'))
model.add(Dropout(0.10))
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.109))
model.add(Dense(50))
model.add(Activation('relu'))
model.add(Dense(1))
model.add(Activation('sigmoid'))

# Compile model
model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=[f1_weighted])

### Fit the model

In [None]:
# Define some customs parameters
batch_size = 64
epochs = 300
weights = {0: 1, 1: 5}

# Fit model
model.fit(X_train.astype('int32'), y_train.astype('int32'),
                  batch_size=batch_size,
                  epochs=epochs,
                  verbose=True,
                  class_weight=weights)

### Use the model to predict some data

In [None]:
y_pred = model.predict_classes(X_test.astype('int32'))

## Output results

In [None]:
# Prepare the output data
output = model.predict_classes(onehotencode(split(test_data[:,0])))

# Output it
data = pd.Series(output.reshape(-1))
data.to_csv("output.csv",index=False,header=False)