In [1]:
import tensorflow as tf
from tensorflow import keras
assert tf.__version__ >= "0.20"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler

In [2]:
tf.random.set_seed(42)
np.random.seed(42)

In [3]:
grid_node_data = pd.read_csv('../data/grid_node_data.csv', sep=',', encoding='latin-1', index_col=0)
police_station_data = pd.read_csv('../data/police-station.csv', sep=',', encoding='latin-1', index_col=0)
fire_station_data = pd.read_csv('../data/fire-station.csv', sep=',', encoding='latin-1', index_col=0)

zone_police_data = pd.merge(grid_node_data, police_station_data, how='left', on=['x', 'y'])
zone_df = pd.merge(zone_police_data, fire_station_data, how='left', on=['x', 'y'])

X = zone_df[['time_of_day',
             'month_of_year',
             'x', 'y',
             'number of police station',
             'number of fire station']]

y = pd.read_csv('../data/grid_target.csv', sep=',', encoding='latin-1')


In [4]:
X = pd.concat([X]*3, ignore_index=True)
y = pd.concat([y]*3, ignore_index=True)
y = y[['target']]['target']

In [5]:
X

Unnamed: 0,time_of_day,month_of_year,x,y,number of police station,number of fire station
0,3,1,8,40,0,0
1,2,8,36,141,0,0
2,2,8,13,176,0,0
3,1,1,31,189,0,0
4,2,9,20,46,0,0
...,...,...,...,...,...,...
642166,3,2,33,65,0,0
642167,3,1,66,153,0,0
642168,3,5,86,168,0,0
642169,1,6,87,223,0,0


In [6]:
y

0         0.000000
1         0.000000
2         0.000615
3         0.000000
4         0.000000
            ...   
642166    0.000000
642167    0.000000
642168    0.000476
642169    0.000772
642170    0.004040
Name: target, Length: 642171, dtype: float64

In [7]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import OneHotEncoder
columns_to_encode = ['time_of_day', 'month_of_year']
columns_to_scale  = ['x', 'y']

# Instantiate encoder/scaler
scaler = MinMaxScaler() #StandardScaler()
ohe    = OneHotEncoder(sparse=False)

# Scale and Encode Separate Columns
scaled_columns  = scaler.fit_transform(X[columns_to_scale]) 
encoded_columns =    ohe.fit_transform(X[columns_to_encode])

# Concatenate (Column-Bind) Processed Columns Back Together
processed_data = np.concatenate([scaled_columns, encoded_columns], axis=1)

In [8]:
y.tail()

642166    0.000000
642167    0.000000
642168    0.000476
642169    0.000772
642170    0.004040
Name: target, dtype: float64

In [9]:
X = processed_data
X

array([[0.05333333, 0.15873016, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.24      , 0.55952381, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.08666667, 0.6984127 , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.57333333, 0.66666667, 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.58      , 0.88492063, 1.        , ..., 0.        , 0.        ,
        0.        ],
       [0.50666667, 0.87301587, 1.        , ..., 0.        , 0.        ,
        0.        ]])

In [10]:
def split_data(X, y):
    sep1, sep2, sep3 = int(len(X)*0.7), int(len(X)*0.8), int(len(X))
    X_train, X_test, X_valid = X[:sep1], X[sep1:sep2], X[sep2:sep3]
    # change probability to be percentage
    y_train, y_test, y_valid = y[:sep1]*100, y[sep1:sep2]*100, y[sep2:sep3]*100
    return X_train, X_test, X_valid, y_train, y_test, y_valid

In [11]:
split_data(X,y)

(array([[0.05333333, 0.15873016, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.24      , 0.55952381, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.08666667, 0.6984127 , 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.56      , 0.82142857, 0.        , ..., 1.        , 0.        ,
         0.        ],
        [0.25333333, 0.11507937, 0.        , ..., 1.        , 0.        ,
         0.        ],
        [0.28666667, 0.24603175, 1.        , ..., 0.        , 0.        ,
         0.        ]]),
 array([[0.51333333, 0.76984127, 0.        , ..., 1.        , 0.        ,
         0.        ],
        [0.21333333, 0.19444444, 0.        , ..., 0.        , 0.        ,
         0.        ],
        [0.41333333, 0.84920635, 0.        , ..., 0.        , 0.        ,
         0.        ],
        ...,
        [0.43333333, 0.73412698, 0.        , ..., 0.        , 0.        ,
         1.        ],
        [0.1

In [12]:
X.shape[1:]

(17,)

In [13]:
def build_model(input_shape=X.shape[1:], n_hidden=2, n_neurons=20, learning_rate=3e-4):
    model = keras.models.Sequential()
    # input layer
    model.add(keras.layers.InputLayer(input_shape=input_shape))
    # hidden layer
    for layer in range(n_hidden):
        model.add(keras.layers.Dense(n_neurons, activation="sigmoid", kernel_initializer="he_normal"))
    # output layer
    model.add(keras.layers.Dense(1))
    optimizer = keras.optimizers.SGD(learning_rate=learning_rate)
    model.compile(loss="mean_squared_error",
                  optimizer=optimizer)
    return model

In [14]:
def train_model(X, y):
    X_train, X_test, X_valid, y_train, y_test, y_valid = split_data(X, y)
    keras_reg = build_model(input_shape=X_train.shape[1:])
    keras_reg.fit(X_train, y_train, epochs=20, validation_data=(X_valid, y_valid),
                  callbacks=[keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)])

    keras_reg.save("neural_network.h5")
    model = keras.models.load_model("neural_network.h5")
    y_pred = model.predict(X_test)
    df = pd.DataFrame(list(zip(y_train, y_pred)), columns=['Actual', 'Predicted'])
    return df

In [15]:
df = train_model(X, y)
print(df)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
          Actual      Predicted
0       0.000000   [0.11935679]
1       0.000000  [0.095548816]
2       0.061543   [0.10832108]
3       0.000000   [0.20137915]
4       0.000000  [0.040308304]
...          ...            ...
128429  0.000000   [0.05653847]
128430  0.032823     [0.159852]
128431  0.000000   [0.06865061]
128432  0.018756   [0.09212408]
128433  0.000000  [0.101745345]

[128434 rows x 2 columns]


### SVM regression

In [25]:
X_train, X_test, X_valid, y_train, y_test, y_valid = split_data(X, y)

In [None]:
from sklearn.svm import SVR
svm_reg = SVR(kernel='poly', degree=2, C=100, epsilon=0.2)
svm_reg.fit(X_train, y_train)

In [None]:
svm_reg.predict(X_test[:10])

In [None]:
y_test[:10]

### Fine turn the model

In [16]:
from scipy.stats import reciprocal
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [17]:
reciprocal(3e-4, 3e-2).rvs(1000).tolist()

[0.001683454924600351,
 0.02390836445593178,
 0.008731907739399206,
 0.004725396149933917,
 0.0006154014789262348,
 0.0006153331256530192,
 0.0003920021771415983,
 0.01619845322936229,
 0.004779156784872302,
 0.007821074275112298,
 0.00032983006724298584,
 0.026118062634914545,
 0.013867767003062484,
 0.0007976263194981808,
 0.0006930605663535878,
 0.0006981203125151339,
 0.0012178834831452913,
 0.00336228186355817,
 0.0021928619507738728,
 0.0011470425674025553,
 0.005021425736625637,
 0.0005703073595961105,
 0.001151888789941251,
 0.001621231156394198,
 0.0024505367684280487,
 0.011155092541719619,
 0.0007524347058135697,
 0.0032032448128444043,
 0.004591455636549438,
 0.0003715541189658278,
 0.004922786019194374,
 0.0006579145666993104,
 0.0004047850280403973,
 0.023707858649124676,
 0.025608569588600475,
 0.012414120337683038,
 0.001219968994054438,
 0.0004703980175016901,
 0.007007890507878476,
 0.0022773314415848074,
 0.0005262568046235237,
 0.002934101104997821,
 0.0003514781217

In [18]:
param_distribs ={
    "n_hidden": [2, 5],
    "n_neurons": np.arange(20, 25),
    "learning_rate": reciprocal(3e-4, 3e-2).rvs(1000).tolist()
}

In [19]:
def fine_turn_model(X, y):
    X_train, X_test, X_valid, y_train, y_test, y_valid = split_data(X, y)
    k_model = keras.wrappers.scikit_learn.KerasRegressor(build_model)
    print(k_model)
    search_cv = GridSearchCV(k_model, param_distribs, cv=3)
    search_cv.fit(X_train, y_train, epochs=100, 
                  validation_data=(X_valid, y_valid),
                  callbacks=[keras.callbacks.EarlyStopping(patience=10)])

    y_pred = search_cv.predict(X_test)
    df = pd.DataFrame(list(zip(y_train, y_pred)), columns=['Actual', 'Predicted'])
    return df

In [20]:
# fine_df = fine_turn_model(X, y)

In [21]:
print(fine_df.head())

NameError: name 'fine_df' is not defined

Fine turning model doesn't improve the accuracy of model. 