## Setup

In [1]:
import csv
import pandas as pd
import numpy as np

train_df = pd.read_csv("training.csv", delimiter=",", header=0)
test_df = pd.read_csv("testing.csv", delimiter=",", header=0)
confidence_df = pd.read_csv("annotation_confidence.csv", delimiter=",", header=0)
additional_df = pd.read_csv("additional_training.csv", delimiter=",", header=0)
proportions_df = pd.read_csv("test_proportions.csv", delimiter=",", header=None, dtype="unicode")

all_df = pd.concat([train_df, additional_df])

unconfident_list = confidence_df[confidence_df.confidence != 1]['ID'].tolist()
confident_list = confidence_df[confidence_df.confidence == 1]['ID'].tolist()

## Confident Training Average

 - Training Data
 - Confident Training Data
 - Confident 0 Training Data
 - Confident 1 Trainng Data
 - Averages for above 2

In [2]:
confident_train_df = train_df[train_df['ID'].isin(confident_list)]
confident_0_train_df = confident_train_df[confident_train_df.prediction == 0]
confident_1_train_df = confident_train_df[confident_train_df.prediction == 1]
confident_0_avg = pd.DataFrame(confident_0_train_df.mean(axis=0)[1:-1]).T
confident_1_avg = pd.DataFrame(confident_1_train_df.mean(axis=0)[1:-1]).T

## Unconfident Training and Additional Average

 - Unconfident All Data
 - Unconfident 0 All Data
 - Unconfident 1 All Data
 - Averages for above 2

In [3]:
unconfident_all_df =  all_df[all_df['ID'].isin(unconfident_list)]
unconfident_0_all_df = unconfident_all_df[unconfident_all_df.prediction == 0]
unconfident_1_all_df = unconfident_all_df[unconfident_all_df.prediction == 1]
unconfident_0_avg = pd.DataFrame(unconfident_0_all_df.mean(axis=0)[1:-1]).T
unconfident_1_avg = pd.DataFrame(unconfident_1_all_df.mean(axis=0)[1:-1]).T

## Fill Additional With Confident Averages

 - Additional Data
 - Confident Additional Data
 - Confident 0 Additional Data
 - Confident 1 Additional Data
 - FillNa above 2 with Confident Averages
 - Update Additional with New values

In [4]:
confident_additional_df = additional_df[additional_df['ID'].isin(confident_list)].copy()
confident_0_additional_df = confident_additional_df[confident_additional_df.prediction == 0]
confident_1_additional_df = confident_additional_df[confident_additional_df.prediction == 1]
confident_0_additional_df = confident_0_additional_df.fillna(value=confident_0_avg.iloc[0])
confident_1_additional_df = confident_1_additional_df.fillna(value=confident_1_avg.iloc[0])

In [5]:
confident_additional_df.update(confident_0_additional_df)
confident_additional_df.update(confident_1_additional_df)
additional_df.update(confident_additional_df)

## Fill Additional With Unconfident Averages

 - Unconfident AdditionalData
 - Unconfident 0 Additional Data
 - Unconfident 1 Additional Data
 - FillNa above 2 with Unconfident Averages
 - Update Additional with New Values


In [6]:
unconfident_additional_df = additional_df[additional_df['ID'].isin(unconfident_list)].copy()
unconfident_0_additional_df = unconfident_additional_df[unconfident_additional_df.prediction == 0]
unconfident_1_additional_df = unconfident_additional_df[unconfident_additional_df.prediction == 1]
unconfident_0_additional_df = unconfident_0_additional_df.fillna(value=unconfident_0_avg.iloc[0])
unconfident_1_additional_df = unconfident_1_additional_df.fillna(value=unconfident_1_avg.iloc[0])

In [7]:
unconfident_additional_df.update(unconfident_0_additional_df)
unconfident_additional_df.update(unconfident_1_additional_df)
additional_df.update(unconfident_additional_df)

### Update All with New Additional Values

In [8]:
all_df.update(additional_df)

# Multi-Layered Perceptron

In [9]:
from keras.models import Sequential
from keras.layers import Dense
from keras.utils.vis_utils import plot_model
from keras.wrappers.scikit_learn import KerasRegressor
from matplotlib import pyplot
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import KFold, cross_val_score, cross_val_predict

Using TensorFlow backend.


In [10]:
all_input_data = all_df.drop(['ID', 'prediction'], axis=1).copy()
all_output_data = all_df['prediction'].copy()

testing_input_data = test_df.drop(['ID'], axis=1).copy()

seed = 1

## Create MLP Model

In [11]:
#create model
def model():
    model = Sequential()
    model.add(Dense(18, input_dim=4608, activation='relu'))
    model.add(Dense(9, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    #model.summary()
    return model

In [12]:
estimator = KerasRegressor(build_fn=model, nb_epoch=100, batch_size=36, verbose=0)
kfold = KFold(n_splits=18, random_state=seed)

In [13]:
results = cross_val_score(estimator, all_input_data, all_output_data, cv=kfold)
print("Results: %.2f (%.2f) MSE" % (results.mean(), results.std()))
estimator.fit(all_input_data, all_output_data)
predictions = estimator.predict(testing_input_data)
rounded = [round(x) for x in predictions]

Results: -0.37 (0.05) MSE


In [14]:
val1 = (rounded.count(1) / 5040) *100
val0 = (rounded.count(0) / 5040) *100
print('Ones:' , val1)
print('Zeros:', val0)
display(proportions_df)

Ones: 59.30555555555556
Zeros: 40.69444444444444


Unnamed: 0,0
0,0.4286 class 1
1,0.5714 class 0


In [15]:
#create results dataframe
results_df = test_df[['ID']].copy()
results_df['prediction'] = pd.Series(rounded).astype(int)
#display(results_df[:9])

In [16]:
#write results to file
results_df.to_csv('submission5.csv', index=False)

In [17]:
# Non Clear Re Run : 0(42.61904761904762) 1(57.38095238095238)
# Clear Re Run : 0(44.44444444444444) 1(55.55555555555556)
# Clear Re Run Again : 0(43.37301587301587) 1(56.62698412698413)