# Deploying CWD Analysis to Google Cloud

In [1]:
# 1) import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.layers import Conv1D, Conv2D, MaxPooling1D, MaxPooling2D
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, SimpleRNN
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from keras.layers import TimeDistributed
from keras.optimizers import RMSprop
import keras
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras.preprocessing.sequence import TimeseriesGenerator

In [2]:
# 2) data upload and wrangling
deer = pd.read_csv("DMA1_withlatlong.csv")
deer = deer[['Status', 'Sample Date', 'Lat_Cent', 'Long_Cent','Age', 'Sex', 
             'Active - Hunter Killed', 'Active - Other', 'Active - Road-Killed',
             'Targeted - Other', 'Targeted-Clinical suspect']]
deer['Sample Date'] = pd.to_datetime(deer['Sample Date'])
deer['Age'] = pd.to_numeric(deer['Age'],errors='coerce')
deer = deer.dropna()
# with timestamp 
X = deer.iloc[:,1:]  
# response
y = deer.Status
X["Sample Date"] = (X["Sample Date"]-X["Sample Date"].min()).astype('timedelta64[Y]').astype(int)+1
y=pd.DataFrame(y)
y.Status=y.Status.astype('float64')
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
#X_train, X_test, y_train, y_test = np.array(X_train), np.array(X_test), np.array(y_train), np.array(y_test)

In [3]:
# 3) calculate weights because there are so few positive cases
from sklearn.utils import class_weight
y_train=y_train.values.reshape(7904,)

class_weights = class_weight.compute_class_weight('balanced',
                                                 np.unique(y_train),
                                                 y_train)
class_weights = {i : class_weights[i] for i in range(2)}
class_weights

{0: 0.5036319612590799, 1: 69.33333333333333}

In [4]:
tf.random.set_seed(7)

In [5]:
# Simple neural net that DOES work (have to take out time stamp variable though, i.e. above)
model = Sequential()
model.add(Flatten(input_shape=[10,]))
#model.add(keras.layers.BatchNormalization())
#model.add(Dense(150, activation='relu'))
#model.add(Dense(100, activation='relu'))
#model.add(Dense(30, activation='relu'))
model.add(Dense(5, activation='relu'))
model.add(Dense(1, activation='sigmoid'))

In [6]:
from keras import backend as K

def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [7]:
sgd = keras.optimizers.SGD(lr=0.00001, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy',
              optimizer=sgd,
              metrics=['accuracy', f1_m, precision_m, recall_m]) 

In [8]:
history = model.fit(X_train, 
                    y_train,
                    epochs=13, 
                    batch_size=4,
                    class_weight=class_weights,
                    validation_data=(X_test, y_test), 
                    verbose=1)

Epoch 1/13
Epoch 2/13
Epoch 3/13
Epoch 4/13
Epoch 5/13
Epoch 6/13
Epoch 7/13
Epoch 8/13
Epoch 9/13
Epoch 10/13
Epoch 11/13
Epoch 12/13
Epoch 13/13


# Save/Load a `SavedModel`

In [9]:
import os
model_version = "0001"
model_name = "my_cwd_model"
model_path = os.path.join(model_name, model_version)
model_path

'my_cwd_model/0001'

In [10]:
!rm -rf {model_name}

In [11]:
tf.saved_model.save(model, model_path)

INFO:tensorflow:Assets written to: my_cwd_model/0001/assets


In [12]:
for root, dirs, files in os.walk(model_name):
    indent = '    ' * root.count(os.sep)
    print('{}{}/'.format(indent, os.path.basename(root)))
    for filename in files:
        print('{}{}'.format(indent + '    ', filename))

my_cwd_model/
    0001/
        saved_model.pb
        assets/
        variables/
            variables.data-00000-of-00001
            variables.index


In [13]:
#!saved_model_cli show --dir {model_path}
#!saved_model_cli show --dir {model_path} --tag_set serve
#!saved_model_cli show --dir {model_path} --tag_set serve \
#                      --signature_def serving_default
#!saved_model_cli show --dir {model_path} --all

In [14]:
# subset something out for testing
X_new = X_test[:3]
np.save("my_cwd_tests.npy", X_new)

In [15]:
input_name = model.input_names[0]
input_name

'flatten_input'

In [16]:
!saved_model_cli run --dir {model_path} --tag_set serve \
                     --signature_def serving_default    \
                     --inputs {input_name}=my_cwd_tests.npy

2021-05-02 17:11:56.566376: I tensorflow/stream_executor/platform/default/dso_loader.cc:44] Successfully opened dynamic library libcuda.so.1
2021-05-02 17:11:56.576339: E tensorflow/stream_executor/cuda/cuda_driver.cc:351] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2021-05-02 17:11:56.576375: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (3d6e4b5ac0cf): /proc/driver/nvidia/version does not exist
2021-05-02 17:11:56.581357: I tensorflow/core/platform/profile_utils/cpu_utils.cc:94] CPU Frequency: 2299995000 Hz
2021-05-02 17:11:56.581646: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x55fa34616840 initialized for platform Host (this does not guarantee that XLA will be used). Devices:
2021-05-02 17:11:56.581682: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): Host, Default Version
W0502 17:11:56.583014 139673341593472 deprecation.py:323]

In [17]:
!zip -r /content/file.zip /content/my_cwd_model/0001

updating: content/my_cwd_model/0001/ (stored 0%)
updating: content/my_cwd_model/0001/assets/ (stored 0%)
updating: content/my_cwd_model/0001/variables/ (stored 0%)
updating: content/my_cwd_model/0001/variables/variables.data-00000-of-00001 (deflated 67%)
updating: content/my_cwd_model/0001/variables/variables.index (deflated 63%)
updating: content/my_cwd_model/0001/saved_model.pb (deflated 86%)


In [18]:
output_name = model.output_names[0]

# Deploy to Google Cloud Platform

In [19]:
project_id = "cwd-project-312503"

In [20]:
import googleapiclient.discovery

In [21]:
import os
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "cwd-project-312503-de2ed4945e4c.json"

In [22]:
model_id = "cwd_model_ann"

In [23]:
model_path = "projects/{}/models/{}".format(project_id, model_id)
model_path += "/versions/version_0001/" # if you want to run a specific version
ml_resource = googleapiclient.discovery.build("ml", "v1").projects()

In [24]:
def predict(X):
    input_data_json = {"signature_name": "serving_default",
                       "instances": X.values.tolist()} # added .values before .tolist()
    request = ml_resource.predict(name=model_path, body=input_data_json)
    response = request.execute()
    if "error" in response:
        raise RuntimeError(response["error"])
    return np.array([pred[output_name] for pred in response["predictions"]])

In [59]:
Y_probas = predict(X_new)
np.where(np.array(Y_probas) >= 0.5,'positive case','negative case')

array([['negative case'],
       ['positive case'],
       ['positive case']], dtype='<U13')

In [27]:
# these were the test cases we used in this example
X_new

Unnamed: 0,Sample Date,Lat_Cent,Long_Cent,Age,Sex,Active - Hunter Killed,Active - Other,Active - Road-Killed,Targeted - Other,Targeted-Clinical suspect
8138,10,39.200785,-78.24312,5.5,0.0,1,0,0,0,0
8139,10,39.05279,-78.118726,1.5,0.0,1,0,0,0,0
8140,10,39.24245,-78.16691,3.5,0.0,1,0,0,0,0
