In [None]:
from keras import optimizers
from keras import losses
from keras import metrics
from keras import regularizers
from keras.datasets import reuters
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.utils import to_categorical
from keras.preprocessing import sequence

import numpy as np
from numpy import argmax

import matplotlib.pyplot as plt

In [None]:
# loading the data
import pandas as pd
data = pd.read_csv('data/SpeedDatingData.csv', engine='python')

In [None]:
# remove features that will not impact the match
data = data.drop(columns=['iid', 'id', 'gender', 'idg', 'condtn', 'wave', 'round', 'position', 'positin1'])
# remove features with not enough data
data = data.drop(columns=[i for i in data.columns if (data[i].count() < 6000)])

In [None]:
# drop rows with no data in 'match' column
data['match'] = pd.to_numeric(data['match'], errors='coerce')
data = data.dropna(subset=['match'])

# set aside labels as target vector
labels = pd.DataFrame([data['match']])
data = data.drop(columns=['match'])

# remove categorical features
cat_cols = [i for i in data.columns if data[i].dtype != 'float']
data = data.drop(columns=[i for i in cat_cols])

labels[0].apply(lambda x: int(x))
labels = labels.T

# display values in each class
target_count = labels.match.value_counts()
target_count.plot(kind='bar', title='Target Counts')

In [None]:
# oversampling data to improve class balance
from imblearn.over_sampling import SMOTE
data_resampled, labels_resampled = SMOTE().fit_resample(data, labels)

In [None]:
(labels == 1).sum()

In [None]:
# split the data into train and test
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.30)

In [None]:
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
print(y_train.shape)

In [None]:
val_size = int(np.round(len(x_train)*.1))
x_val = x_train[:val_size]
x_train_minus_val = x_train[val_size:]

y_val = y_train[:val_size]
y_train_minus_val = y_train[val_size:]

In [None]:
x_train_minus_val.shape

In [None]:
# train basic RNN
model = Sequential()
model.add(Dense(16, activation='relu', input_shape=(93,)))
model.add(Dense(16, activation='relu'))
model.add(Dense(2, activation='softmax')) 

model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

history = model.fit(x_train_minus_val,
                    y_train_minus_val,
                    epochs=10,
                    batch_size=216,
                    validation_data=(x_val, y_val))

In [None]:
cat_cols