# Prediction using keras

Let us import pandas to read our csv file.

In [1]:
import pandas as pd

In [2]:
df=pd.read_csv('Dataset/train_clean_data.csv')

In [3]:
df.head()

Unnamed: 0,Total_Stops,Price,Journey_Month,Journey_Day,Dep_hour,Dep_min,Arrival_hour,Arrival_min,Duration_hour,Duration_min,...,Airline_Vistara Premium economy,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata,Destination_New Delhi
0,0,3897,3,24,22,20,1,10,2,50,...,0,0,0,0,0,0,0,0,0,1
1,2,7662,5,1,5,50,13,15,7,25,...,0,0,0,1,0,0,0,0,0,0
2,2,13882,6,9,9,25,4,25,19,0,...,0,0,1,0,0,1,0,0,0,0
3,1,6218,5,12,18,5,23,30,5,25,...,0,0,0,1,0,0,0,0,0,0
4,1,13302,3,1,16,50,21,35,4,45,...,0,0,0,0,0,0,0,0,0,1


Let us assign price as output and rest all of the columns as input.

In [4]:
X=df.drop(['Price'],axis=1)
y=df[['Price']]
X=X.iloc[:,:].values
y=y.iloc[:,:].values

In [5]:
X

array([[ 0,  3, 24, ...,  0,  0,  1],
       [ 2,  5,  1, ...,  0,  0,  0],
       [ 2,  6,  9, ...,  0,  0,  0],
       ...,
       [ 0,  4, 27, ...,  0,  0,  0],
       [ 0,  3,  1, ...,  0,  0,  1],
       [ 2,  5,  9, ...,  0,  0,  0]], dtype=int64)

In [6]:
X.shape

(10681, 28)

In [7]:
y

array([[ 3897],
       [ 7662],
       [13882],
       ...,
       [ 7229],
       [12648],
       [11753]], dtype=int64)

In [8]:
y.shape

(10681, 1)

Let us normalize our input and output.

In [9]:
# data normalization with sklearn
from sklearn.preprocessing import MinMaxScaler,StandardScaler

scaler = StandardScaler() 
X_scaled = scaler.fit_transform(X)
y_scaled = scaler.fit_transform(y)


In [10]:
X_scaled

array([[-1.22058845, -1.46761891,  1.23719214, ..., -0.26401621,
        -0.1923286 ,  3.23423877],
       [ 1.74138025,  0.25016506, -1.47537531, ..., -0.26401621,
        -0.1923286 , -0.30919177],
       [ 1.74138025,  1.10905704, -0.53187359, ..., -0.26401621,
        -0.1923286 , -0.30919177],
       ...,
       [-1.22058845, -0.60872693,  1.59100529, ..., -0.26401621,
        -0.1923286 , -0.30919177],
       [-1.22058845, -1.46761891, -1.47537531, ..., -0.26401621,
        -0.1923286 ,  3.23423877],
       [ 1.74138025,  0.25016506, -0.53187359, ..., -0.26401621,
        -0.1923286 , -0.30919177]])

In [11]:
y_scaled

array([[-1.12564296],
       [-0.30917044],
       [ 1.03968987],
       ...,
       [-0.4030702 ],
       [ 0.7720864 ],
       [ 0.57799797]])

Let us import Sequential for declaring our model and Dense to specify number of layers.

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

Let us define keras model with 18 layers with input layer having 256 nodes, optimizer adam and loss mean_squared_error for regression.

In [13]:
def regression_model():
    # create model
    model = Sequential()
    model.add(Dense(256, activation='relu', input_shape=(28,)))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(1))
    
    # compile model
    model.compile(optimizer='adam', loss='mean_squared_error')
    return model

In [14]:
model = regression_model()

Let us fit our training data.

In [15]:
model.fit(X_scaled, y_scaled, validation_split=0.3, epochs=100, verbose=2)

Epoch 1/100
234/234 - 2s - loss: 0.5184 - val_loss: 0.3360
Epoch 2/100
234/234 - 1s - loss: 0.3497 - val_loss: 0.2498
Epoch 3/100
234/234 - 1s - loss: 0.2811 - val_loss: 0.3672
Epoch 4/100
234/234 - 1s - loss: 0.3190 - val_loss: 0.2463
Epoch 5/100
234/234 - 1s - loss: 0.2534 - val_loss: 0.2528
Epoch 6/100
234/234 - 1s - loss: 0.2895 - val_loss: 0.2475
Epoch 7/100
234/234 - 1s - loss: 0.3119 - val_loss: 0.2729
Epoch 8/100
234/234 - 1s - loss: 0.2323 - val_loss: 0.2600
Epoch 9/100
234/234 - 1s - loss: 0.2490 - val_loss: 0.2390
Epoch 10/100
234/234 - 1s - loss: 0.2549 - val_loss: 0.2181
Epoch 11/100
234/234 - 1s - loss: 0.2419 - val_loss: 0.2468
Epoch 12/100
234/234 - 1s - loss: 0.2076 - val_loss: 0.2400
Epoch 13/100
234/234 - 1s - loss: 0.2281 - val_loss: 0.2278
Epoch 14/100
234/234 - 1s - loss: 0.1900 - val_loss: 0.2405
Epoch 15/100
234/234 - 1s - loss: 0.2213 - val_loss: 0.2096
Epoch 16/100
234/234 - 1s - loss: 0.2132 - val_loss: 0.2031
Epoch 17/100
234/234 - 1s - loss: 0.1844 - val_lo

<tensorflow.python.keras.callbacks.History at 0x247d4e36a88>

Let us import testing data and normalize it.

In [16]:
df=pd.read_csv('Dataset/test_clean_data.csv')

In [17]:
df.head()

Unnamed: 0,Total_Stops,Journey_Month,Journey_Day,Dep_hour,Dep_min,Arrival_hour,Arrival_min,Duration_hour,Duration_min,Airline_Air India,...,Airline_Vistara Premium economy,Source_Chennai,Source_Delhi,Source_Kolkata,Source_Mumbai,Destination_Cochin,Destination_Delhi,Destination_Hyderabad,Destination_Kolkata,Destination_New Delhi
0,1,6,6,17,30,4,25,10,55,0,...,0,0,1,0,0,1,0,0,0,0
1,1,5,12,6,20,10,20,4,0,0,...,0,0,0,1,0,0,0,0,0,0
2,1,5,21,19,15,19,0,23,45,0,...,0,0,1,0,0,1,0,0,0,0
3,1,5,21,8,0,21,0,13,0,0,...,0,0,1,0,0,1,0,0,0,0
4,0,6,24,23,55,2,45,2,50,0,...,0,0,0,0,0,0,1,0,0,0


In [18]:
X_test=df.iloc[:,:].values

In [19]:
X_test

array([[ 1,  6,  6, ...,  0,  0,  0],
       [ 1,  5, 12, ...,  0,  0,  0],
       [ 1,  5, 21, ...,  0,  0,  0],
       ...,
       [ 1,  3,  6, ...,  0,  0,  0],
       [ 1,  3,  6, ...,  0,  0,  0],
       [ 1,  6, 15, ...,  0,  0,  0]], dtype=int64)

In [20]:
X_test.shape

(2671, 28)

Let us normalize our testing data.

In [21]:
X_scaled_test = scaler.fit_transform(X_test)

In [22]:
X_scaled_test

array([[ 0.24785314,  1.09474457, -0.83733254, ..., -0.27358563,
        -0.16997235, -0.31276448],
       [ 0.24785314,  0.2405048 , -0.11087675, ..., -0.27358563,
        -0.16997235, -0.31276448],
       [ 0.24785314,  0.2405048 ,  0.97880692, ..., -0.27358563,
        -0.16997235, -0.31276448],
       ...,
       [ 0.24785314, -1.46797475, -0.83733254, ..., -0.27358563,
        -0.16997235, -0.31276448],
       [ 0.24785314, -1.46797475, -0.83733254, ..., -0.27358563,
        -0.16997235, -0.31276448],
       [ 0.24785314,  1.09474457,  0.25235114, ..., -0.27358563,
        -0.16997235, -0.31276448]])

Let us print predicted values.

In [23]:
pred=model.predict(X_scaled_test)
pred

array([[ 0.5769033 ],
       [-1.0783337 ],
       [ 1.2391145 ],
       ...,
       [ 1.0120201 ],
       [ 0.6936318 ],
       [-0.16420469]], dtype=float32)

Let us print our loss.

In [24]:
score = model.evaluate(X_scaled_test,pred,verbose=0)
print('Loss:',score)

Loss: 0.0


Our loss is 0.
It's highly accurate.

In [25]:
pred.shape

(2671, 1)