In [1]:
from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.layers import LSTM, Dropout
from keras.optimizers import RMSprop, adam
from keras.utils.data_utils import get_file
from keras.callbacks import History, ModelCheckpoint
import matplotlib.pyplot as plt
import numpy as np
import string
import pandas as pd

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# Model params
batch_size = 256
nb_epoch = 150
np.random.seed(131078)

# Define number of inputs/outputs to handle
input_size = 81
output_size= 16

# Timeframe
maxlen = 90

# build the model
print('Build model...')

filepath="lstm_2_layers_higher_dropout.hdf5"
model = Sequential()
model.add(LSTM(256, input_shape=(maxlen, input_size), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(256))
model.add(Dropout(0.6))
model.add(Dense(output_size))
model.add(Activation('sigmoid'))


# load weights
model.load_weights("lstm_2_layers_higher_dropout - Copie.hdf5")
# Compile model (required to make predictions)
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
print("Created model and loaded weights from file")

Using TensorFlow backend.


Build model...
Created model and loaded weights from file


In [3]:
%run ../prepare_data.py -N_users 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [4]:
def generate_prediction(history,days=28, maxlen=60, input_size=16, output_size=16, diversity=1):
    """
    Generates as many days of prediction as requested
    Considers maxlen days of past history (must be aligned with model)
    """
    generated = np.zeros((days,output_size))
    if history.shape[1]>maxlen:
        x = history[:,-maxlen-1:-1,:input_size]
    else:
        x = history[:,:,:input_size]
    #print(x.shape)
    for i in range(days):
        #print("Day %d" % i)
        preds = model.predict(x, verbose=0)[0].reshape(output_size)
        #print(preds)
        generated[i,:] = preds
        
        if input_size > output_size:
            res = np.zeros(input_size)
            res[:output_size] = preds
            preds = res

        #print(preds.shape)
        #next_symptoms = sample(preds, diversity)
        next_symptoms = preds
        #print(next_symptoms)


        x[:,:maxlen-1,:] = x[:,1:,:]
        x[:,maxlen-1,:] = next_symptoms

    return generated

In [None]:
def format_prediction(prediction,user_id):
    output = []
    prediction = pd.DataFrame(prediction)
    for i,row in prediction.iterrows():
        for j, symptom in enumerate(row):
            line = [user_id,i+1,j,prediction.ix[i,j]]
            output.append(line)
    return output

def pad_reshape_history(sequence,maxlen,input_size):
    if sequence.shape[0] < maxlen:
        hist = np.zeros((maxlen,sequence.shape[1]))
        hist[maxlen-sequence.shape[0]:,:] = sequence
    else:
        hist = sequence[-maxlen-1:-1,:]
    if sequence.shape[1] > input_size:
        hist = hist[:,:input_size]
    hist = hist.reshape(1,maxlen,-1)
    return hist
    
symptoms_of_interest_dict = {
    0:'happy',
    1:'pms',
    2:'sad', 
    3:'sensitive_emotion',
    4:'energized', 
    5:'exhausted', 
    6:'high_energy', 
    7:'low_energy',
    8:'cramps', 
    9:'headache', 
    10:'ovulation_pain', 
    11:'tender_breasts',
    12:'acne_skin', 
    13:'good_skin', 
    14:'oily_skin', 
    15:'dry_skin'
}
        
submission = []
j = 0
for index, woman in cycles0.iterrows():
    current_id = woman.user_id
    expected_length = int(np.ceil(woman.expected_cycle_length))
    sequence = transform_user(current_id)
    hist = pad_reshape_history(sequence,maxlen,input_size)
    res = generate_prediction(hist,maxlen=maxlen,input_size=input_size,output_size=output_size,days=expected_length)
    submission.append(format_prediction(res,current_id))
    j+=1
    if j > 50:
        break  

submission_df = pd.concat([pd.DataFrame(submission[i]) for i in range(len(submission))], ignore_index=True)  
submission_df.columns = ['user_id','day_in_cycle','symptom','probability']
submission_df["symptom"] = submission_df["symptom"].apply(lambda x: symptoms_of_interest_dict[x])    

submission_df.to_csv("results.csv",index=False) 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s
