In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os, re
from tensorflow import keras
import keras
from tensorflow.keras import layers, models, regularizers
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn import metrics, preprocessing
from keras.utils.np_utils import to_categorical
from keras.layers import BatchNormalization

In [2]:
train_df = pd.read_csv('large.csv', sep='\t')
test_df = pd.read_csv('test.csv', sep='\t')

In [3]:
train_df = train_df.round(4)
test_df = test_df.round(4)

In [4]:
X = train_df[['peak_1', 'h_1', 'k_1', 'l_1', 'peak_2', 'h_2', 'k_2', 'l_2', 'peak_3', 'h_3', 'k_3', 'l_3', 'peak_4', 'h_4', 'k_4', 'l_4', 'peak_5', 'h_5', 'k_5', 'l_5', 'peak_6', 'h_6', 'k_6', 'l_6']]
y = train_df['space_code']

In [5]:
skf = StratifiedKFold(n_splits=10, shuffle=True)

In [6]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=np.random.seed(42))
cvscores = []
for train, test in kfold.split(X, y):
    
    one_hot_train_labels = to_categorical(y[train])
    one_hot_test_labels = to_categorical(y[test])
    
    X_val = X.iloc[train][:1000]
    partial_X_train = X.iloc[train][1000:]
    y_val = one_hot_train_labels[:1000]
    partial_y_train = one_hot_train_labels[1000:]
    
    model = models.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=24))
    BatchNormalization()
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(9, activation='softmax'))
    
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
    
    model.fit(partial_X_train, partial_y_train, epochs=500, batch_size=100, validation_data=(X_val, y_val))
    
    scores = model.evaluate(X.iloc[test], one_hot_test_labels, verbose=0)
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    cvscores.append(scores[1] * 100)

Epoch 1/500
Epoch 498/500
Epoch 499/500


Epoch 500/500
accuracy: 75.40%


In [7]:
print("%.2f%% (+/- %.2f%%)" % (np.mean(cvscores), np.std(cvscores)))

75.87% (+/- 0.72%)


In [8]:
X_test = test_df[['peak_1', 'h_1', 'k_1', 'l_1', 'peak_2', 'h_2', 'k_2', 'l_2', 'peak_3', 'h_3', 'k_3', 'l_3', 'peak_4', 'h_4', 'k_4', 'l_4', 'peak_5', 'h_5', 'k_5', 'l_5', 'peak_6', 'h_6', 'k_6', 'l_6']]
y_test = test_df['space_code']

In [9]:
measured_test_labels = to_categorical(y_test)

In [10]:
model.evaluate(X_test, measured_test_labels)



[86.255859375, 0.4280155599117279]