In [158]:
import numpy as np 
import tensorflow as tf 
import keras
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Flatten, Dropout
from keras.layers import BatchNormalization
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
from keras.utils.np_utils import to_categorical

# Seed the random number generator:
np.random.seed(1)

def load_data(filename, skiprows = 1):
    """
    Function loads data stored in the file filename and returns it as a numpy ndarray.
    
    Inputs:
        filename: given as a string.
        
    Outputs:
        Data contained in the file, returned as a numpy ndarray
    """
    return np.loadtxt(filename, skiprows=skiprows, delimiter=' ')

In [159]:
X = load_data('training_data.txt')
y = X[:, 0]
X = X[:, 1:]

In [160]:
np.shape(X)

(20000, 1000)

In [161]:
np.shape(y)

(20000,)

In [245]:
def make_model(X_train, y_train, X_test, y_test):
    ## Create your own model here given the constraints in the problem
    model = Sequential() # Use np.reshape instead of this in hw
    model.add(Dense(1000, input_dim = 1000))
    model.add(Activation('relu'))
    model.add(Dropout(0.4))
    model.add(Dense(1000))
    model.add(BatchNormalization())
    model.add(Activation('relu'))
    model.add(Dropout(0.3))
    model.add(Dense(200))
    model.add(Activation('relu'))
    model.add(Dense(2))
    model.add(Activation('softmax'))

    ## Printing a summary of the layers and weights in your model
    #model.summary()
    
    ## In the line below we have specified the loss function as 'mse' (Mean Squared Error) because in the above code we did not one-hot encode the labels.
    ## In your implementation, since you are one-hot encoding the labels, you should use 'categorical_crossentropy' as your loss.
    ## You will likely have the best results with RMS prop or Adam as your optimizer.  In the line below we use Adadelta
    model.compile(loss='categorical_crossentropy',optimizer='Adam', metrics=['accuracy'])
    
    fit = model.fit(X_train, y_train, batch_size=128, epochs=2,
        verbose=1)

    ## Printing the accuracy of our model, according to the loss function specified in model.compile above
    score = model.evaluate(X_test, y_test, verbose=0)
    print('Test score:', score[0])
    print('Test accuracy:', score[1])
    
    return score[1], model

In [246]:
kf = KFold(n_splits = 10, shuffle=True)
scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    y_train = to_categorical(y_train)
    y_test = to_categorical(y_test)

    scores.append(make_model(X_train, y_train, X_test, y_test)[0])

print(np.mean(scores))

ValueError: Input arrays should have the same number of samples as target arrays. Found 18000 input samples and 36000 target samples.

In [166]:
X_test = load_data('test_data.txt')
X = load_data('training_data.txt')
y = X[:, 0]
y = to_categorical(y)
X = X[:, 1:]
m = make_model(X, y, X, y)[1]
pred = m.predict(X_test)

Epoch 1/3
Epoch 2/3
Epoch 3/3
Test score: 0.130459776002
Test accuracy: 0.96495


In [167]:
def format_pred(pred):
    result = [1 if i[0] == 0 else 0 for i in np.round(pred)]
    with open("result.txt", "w") as f:
        f.write("Id,Prediction\n") 
        for i in range(1, len(result) + 1):
            f.write(str(i) + "," + str(result[i-1]) + "\n")

In [168]:
format_pred(pred)

In [175]:
def f(k , g):
    return max(0, 0.1 + 0.84*k -0.38*g)
g = 0.05
k = 0.39052645470586644
for i in range(6):
    k = f(k ,g)
    print(k)

0.40904222195292783
0.4245954664404593
0.43766019180998583
0.448634561120388
0.45785303134112587
0.4655965463265457


In [216]:
0.5 * np.sqrt(.2) * np.sqrt(.4)

0.1414213562373095

In [241]:
g_arr = ([0.1] * 6) + [0.0917, 0.083,0.075,0.0667,0.0583] + ([0.05] * 989)

In [242]:
k = 0.1
k_arr = []
for i in range(len(g_arr)):
    k = f(k, g_arr[i])
    k_arr.append(k)

In [243]:
e = k_arr[0] * (1 + g_arr[0])
p = e / 1.0655
print(p)
for i in range(1, len(g_arr)):
    e *= (k_arr[i] / k_arr[i - 1]) * (1 + g_arr[i])
    p += e / ((1.0655) ** (i + 1))
    print(e / ((1.0655) ** (i + 1)))

0.15072735804786486
0.19679055642378684
0.2388761086674611
0.2775815311300937
0.3134272322894353
0.34686682542395203
0.3793540914352129
0.4102082942367237
0.43860109227091304
0.4639128452056193
0.48552166542302755
0.5028725087120256
0.5157664103702465
0.5249923098377904
0.5312029648836136
0.5349384092821424
0.5366453705520701
0.5366933435148322
0.5353878956071468
0.5329816806948035
0.5296835560296465
0.5256661290266055
0.5210720042774496
0.5160189546468735
0.5106042017459029
0.5049079591661709
0.4989963654429376
0.49292391184836326
0.4867354520159901
0.4804678654140993
0.4741514342827519
0.467810983382465
0.4614668234037799
0.45513553185192673
0.4488305993973044
0.442562964861941
0.43634145802172236
0.4301731661010396
0.42406373710221995
0.41801763084872234
0.4120383267474976
0.40612849572499393
0.4002901425074742
0.3945247233535865
0.3888332434674396
0.38321633759223
0.37767433668168915
0.37220732304763493
0.3668151759688805
0.3614976094048376
0.3562542031741316
0.351084428724263
0.34

In [244]:
p

46.33686069526799