In [1]:
import pandas as pd
import numpy as np
from keras.layers import Dense,Activation,Flatten
from keras.models import Sequential
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

In [2]:
train_data = pd.read_csv('../input/restaurant-revenue-prediction/train.csv.zip')


In [3]:
train_data.head()

Unnamed: 0,Id,Open Date,City,City Group,Type,P1,P2,P3,P4,P5,...,P29,P30,P31,P32,P33,P34,P35,P36,P37,revenue
0,0,07/17/1999,İstanbul,Big Cities,IL,4,5.0,4.0,4.0,2,...,3.0,5,3,4,5,5,4,3,4,5653753.0
1,1,02/14/2008,Ankara,Big Cities,FC,4,5.0,4.0,4.0,1,...,3.0,0,0,0,0,0,0,0,0,6923131.0
2,2,03/09/2013,Diyarbakır,Other,IL,2,4.0,2.0,5.0,2,...,3.0,0,0,0,0,0,0,0,0,2055379.0
3,3,02/02/2012,Tokat,Other,IL,6,4.5,6.0,6.0,4,...,7.5,25,12,10,6,18,12,12,6,2675511.0
4,4,05/09/2009,Gaziantep,Other,IL,3,4.0,3.0,4.0,2,...,3.0,5,1,3,2,3,4,3,3,4316715.0


In [4]:
train_data.shape

(137, 43)

In [5]:
#Validate missing value
def check_null(df):
    return df.columns[df.isna().any()].tolist()

In [6]:
#get columns with null values
column = check_null(train_data)

In [7]:
print(column)

[]


In [8]:
from datetime import date, datetime
def age(open_date):
    created_date = datetime.strptime(open_date, "%m/%d/%Y").date()
    current_date = date.today()
    return current_date.year - created_date.year - ((current_date.month, current_date.day) < (created_date.month, created_date.day))


In [9]:
train_data['Age'] = train_data['Open Date'].apply(age)


In [10]:
#drop unused columns 
train = train_data.drop(columns = ['Open Date','City'],axis = 0)


In [11]:
X = train.loc[:, train.columns != 'revenue']
y = train.loc[:, train.columns == 'revenue']

In [12]:
#handling categorical values
def oneHotEncode(df,colNames):
    for col in colNames:
        if( df[col].dtype == np.dtype('object')):
            dummies = pd.get_dummies(df[col],prefix=col)
            df = pd.concat([df,dummies],axis=1)

            #drop the encoded column
            df.drop([col],axis = 1 , inplace=True)
    return df
    

In [13]:
X = oneHotEncode(X,X.columns)

In [14]:
X.head()

Unnamed: 0,Id,P1,P2,P3,P4,P5,P6,P7,P8,P9,...,P34,P35,P36,P37,Age,City Group_Big Cities,City Group_Other,Type_DT,Type_FC,Type_IL
0,0,4,5.0,4.0,4.0,2,2,5,4,5,...,5,4,3,4,21,1,0,0,0,1
1,1,4,5.0,4.0,4.0,1,2,5,5,5,...,0,0,0,0,13,1,0,0,1,0
2,2,2,4.0,2.0,5.0,2,3,5,5,5,...,0,0,0,0,8,0,1,0,0,1
3,3,6,4.5,6.0,6.0,4,4,10,8,10,...,18,12,12,6,9,0,1,0,0,1
4,4,3,4.0,3.0,4.0,2,2,5,5,5,...,3,4,3,3,12,0,1,0,0,1


In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [16]:
#create Neural network archetecture
def neural_net_archetecture():
    model = Sequential()
    model.add(Dense(128,kernel_initializer = 'normal',input_dim = X_train.shape[1],activation='relu'))
    model.add(Dense(256,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(256,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(256,kernel_initializer = 'normal',activation = 'relu'))
    
    #output layers
    
    model.add(Dense(1,kernel_initializer='normal',activation = 'linear'))
    model.compile(loss = 'mean_absolute_error',optimizer = 'adam',metrics = ['mean_absolute_error'])
    model.summary()
    return model
    

In [17]:
model = neural_net_archetecture()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 128)               5760      
_________________________________________________________________
dense_1 (Dense)              (None, 256)               33024     
_________________________________________________________________
dense_2 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_3 (Dense)              (None, 256)               65792     
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 257       
Total params: 170,625
Trainable params: 170,625
Non-trainable params: 0
_________________________________________________________________


In [18]:
model.fit(X,y,epochs = 500,batch_size=32,validation_split=0.2)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f2be1efa550>

In [19]:
prediction  = model.predict(X_test)

In [20]:
error = mean_squared_error(y_test,prediction)
rmse = np.sqrt(error)
print(rmse)

2736270.311387463


In [21]:
#create Neural network archetecture
def neural_net_archetecture2():
    model = Sequential()
    model.add(Dense(128,kernel_initializer = 'normal',input_dim = X_train.shape[1],activation='relu'))
    model.add(Dense(64,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(32,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(16,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dense(8,kernel_initializer = 'normal',activation = 'relu'))
    #output layers
    
    model.add(Dense(1,kernel_initializer='normal'))
    model.compile(loss = 'mean_absolute_error',optimizer = 'adam',metrics = ['mean_absolute_error'])
    model.summary()
    return model
    

In [22]:
model2 = neural_net_archetecture2()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 128)               5760      
_________________________________________________________________
dense_6 (Dense)              (None, 64)                8256      
_________________________________________________________________
dense_7 (Dense)              (None, 32)                2080      
_________________________________________________________________
dense_8 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_9 (Dense)              (None, 8)                 136       
_________________________________________________________________
dense_10 (Dense)             (None, 1)                 9         
Total params: 16,769
Trainable params: 16,769
Non-trainable params: 0
__________________________________________________

In [23]:
model2.fit(X,y,epochs = 500,batch_size=32,validation_split=0.2)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f2be1608810>

In [24]:
new_prediction  = model2.predict(X_test)

In [25]:
new_error = mean_squared_error(y_test,new_prediction)
new_rmse = np.sqrt(new_error)
print(new_rmse)

2846677.897542138


In [26]:
if rmse > new_rmse:
    print("2nd model is performing better on this dataset")
else:
    print("1st model is performing better on this dataset")
    

1st model is performing better on this dataset
