# **Model Versioning**

### ***Loading Libraries***

In [1]:
!pip install tensorflow-io

Collecting tensorflow-io
  Downloading tensorflow_io-0.37.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 MB[0m [31m8.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tensorflow-io
Successfully installed tensorflow-io-0.37.0


In [2]:
!pip install keras-tuner

Collecting keras-tuner
  Downloading keras_tuner-1.4.7-py3-none-any.whl (129 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/129.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.1/129.1 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
Collecting kt-legacy (from keras-tuner)
  Downloading kt_legacy-1.0.5-py3-none-any.whl (9.6 kB)
Installing collected packages: kt-legacy, keras-tuner
Successfully installed keras-tuner-1.4.7 kt-legacy-1.0.5


In [3]:
!pip install hypertune

Collecting hypertune
  Downloading hypertune-1.1.0-py3-none-any.whl (22 kB)
Installing collected packages: hypertune
Successfully installed hypertune-1.1.0


In [4]:
# Operating Systems
import os
import shutil
import pathlib
import argparse


# Numerical Computing
import numpy as np

# Data Manipuation
import pandas as pd

# SciPy
import scipy
from scipy import stats
from scipy import signal
from scipy.io import wavfile

# Data Visualization
import itertools
import seaborn as sns
from PIL import Image
import matplotlib.pyplot as plt

# Tabulate
from tabulate import tabulate

# Hyperparameter Tuning
import hypertune

# Tools
from setuptools import setup
from setuptools import find_packages

# BigQuery
from google.cloud import bigquery
from google.colab import auth

# Scikit-Learn
from sklearn.utils import shuffle
from sklearn.preprocessing import normalize
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import precision_recall_fscore_support as score

# Extreme Gradient Boosting
import xgboost as xgb

# PyTorch
import torch
import torch.nn as nn
import torch.optim as optim

# TensorFlow
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow import keras
from tensorflow.keras import Model
import tensorflow_datasets as tfds
from tensorflow_hub import KerasLayer
from tensorflow import feature_column as fc
from tensorflow.python.framework import dtypes
from tensorflow.keras.preprocessing import text
from tensorflow.keras.utils import to_categorical
from tensorflow_io.bigquery import BigQueryClient
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras import callbacks, layers, models, utils
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.layers import Dense, Embedding, Input, Flatten, Conv2D, MaxPooling2D, Lambda

# Keras API
import kerastuner as kt

  import kerastuner as kt


### ***Downloading & Preprocessing Data***

In [5]:
# User Authentication
auth.authenticate_user()

# BigQuery Library
# !pip install --upgrade google-cloud-bigquery

In [6]:
project_id = 'core-catalyst-425922-v9'
os.environ['GOOGLE_CLOUD_PROJECT'] = project_id

# BigQuery Client Config
client = bigquery.Client(project=project_id)

In [8]:
# Run the BigQuery query
query = """
SELECT
  arr_delay,
  carrier,
  origin,
  dest,
  dep_delay,
  taxi_out,
  distance
FROM
  `cloud-training-demos.flights.tzcorr`
WHERE
  EXTRACT(YEAR FROM fl_date) = 2015
ORDER BY fl_date ASC
LIMIT 300000
"""

# Execute the query and store the result in a DataFrame
df = client.query(query).to_dataframe()

# Display the result
df.head()

Unnamed: 0,arr_delay,carrier,origin,dest,dep_delay,taxi_out,distance
0,-16.0,EV,MGM,ATL,-6.0,10.0,147.0
1,14.0,MQ,LGA,DTW,-8.0,17.0,502.0
2,111.0,WN,MCO,BWI,109.0,19.0,787.0
3,-3.0,WN,ATL,MSY,1.0,13.0,425.0
4,-15.0,AA,MIA,BWI,-8.0,21.0,946.0


In [9]:
df = df.dropna()
df = shuffle(df, random_state=2)

In [10]:
df.head()

Unnamed: 0,arr_delay,carrier,origin,dest,dep_delay,taxi_out,distance
229226,10.0,MQ,BWI,ORD,-2.0,16.0,622.0
117560,8.0,US,CLT,PIT,-5.0,33.0,366.0
29070,-7.0,UA,ORD,LAS,11.0,13.0,1514.0
208975,-16.0,DL,BWI,ATL,-13.0,20.0,577.0
48051,34.0,AA,DFW,ORD,20.0,16.0,802.0


In [11]:
# Only include origins and destinations that occur frequently in the dataset
df = df[df['origin'].map(df['origin'].value_counts()) > 500]

df = df[df['dest'].map(df['dest'].value_counts()) > 500]

In [13]:
# Create the arr_delay_bool column
df['arr_delay_bool'] = df['arr_delay'] > 0

# Display value counts for arr_delay_bool
arr_delay_bool_counts = df['arr_delay_bool'].value_counts()

# Display the result
arr_delay_bool_counts

arr_delay_bool
False    130050
True     100201
Name: count, dtype: int64

In [14]:
df['arr_delay_bool'].value_counts()

arr_delay_bool
False    130050
True     100201
Name: count, dtype: int64

In [15]:
classify_model_labels = df['arr_delay_bool']

classify_model_data = df.drop(columns=['arr_delay', 'arr_delay_bool'])

In [21]:
# x,y = classify_model_data,classify_model_labels

# x_train,x_test,y_train,y_test = train_test_split(x,y)

In [22]:
# One-hot encode categorical columns
classify_model_data = pd.get_dummies(classify_model_data, columns=['carrier', 'origin', 'dest'])

# Split the data into training and test sets
x_train, x_test, y_train, y_test = train_test_split(classify_model_data, classify_model_labels, test_size=0.2, random_state=42)

# Setting the model
model = xgb.XGBRegressor(objective='reg:logistic')

# Fitting the model
model.fit(x_train, y_train)

In [23]:
y_pred = model.predict(x_test)

In [24]:
acc = accuracy_score(y_test, np.round(y_pred))
print(acc)

0.8572669431717009


In [25]:
# Save the model
model.save_model('model.bst')



### **Deploying Classification model to AI Platform**

In [26]:
# Set your cloud project
PROJECT = 'core-catalyst-425922-v9'
!gcloud config set project $PROJECT

Updated property [core/project].


In [27]:
BUCKET = PROJECT + '_flight_model_bucket'

In [28]:
# Create a bucket if you don't have one
# You only need to run this once
!gsutil mb gs://$BUCKET

Creating gs://core-catalyst-425922-v9_flight_model_bucket/...


In [29]:
!gsutil cp 'model.bst' gs://$BUCKET

Copying file://model.bst [Content-Type=application/octet-stream]...
-
Operation completed over 1 objects/340.2 KiB.                                    


In [None]:
# Create the model resource
!gcloud ai-platform models create flight_delay_prediction --region us-central1

Using endpoint [https://us-central1-ml.googleapis.com/]
API [ml.googleapis.com] not enabled on project [core-catalyst-425922-v9]. Would 
you like to enable and retry (this will take a few minutes)? (y/N)?  

In [31]:
# Create the version
!gcloud ai-platform versions create 'v1' \
  --model 'flight_delay_prediction' \
  --origin gs://$BUCKET \
  --runtime-version=1.15 \
  --framework 'XGBOOST' \
  --python-version=3.7

Please specify a region:
(For the global endpoint the region needs to be specified as 'global'.)
 [1] global
 [2] asia-east1
 [3] asia-northeast1
 [4] asia-southeast1
 [5] australia-southeast1
 [6] europe-west1
 [7] europe-west2
 [8] europe-west3
 [9] europe-west4
 [10] northamerica-northeast1
 [11] us-central1
 [12] us-east1
 [13] us-east4
 [14] us-west1
 [15] cancel
Please enter your numeric choice:  

Command killed by keyboard interrupt

^C


In [32]:
# Get a prediction on the first example from our test set
!rm input.json
num_examples = 10
with open('input.json', 'a') as f:
  for i in range(num_examples):
    f.write(str(x_test.iloc[i].values.tolist()))
    f.write('\n')

rm: cannot remove 'input.json': No such file or directory


In [33]:
!cat input.json

[102.0, 13.0, 1744.0, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, True, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, False, 

In [None]:
# Make a prediction to the deployed model
!gcloud ai-platform predict --model 'flight_delay_prediction' --version \
  'v1' --json-instances 'input.json'

In [None]:
# Compare this with actual values
print(y_test.iloc[:5])

## **Model Version (2): Replacing XGBoost with TensorFlow**

In [None]:
tf_model = tf.keras.Sequential([
    tf.keras.layers.Dense(32, activation='relu', input_shape=[len(x_train.iloc[0])]),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

tf_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

In [None]:
tf_model.fit(x_train, y_train, epochs=10, validation_split=0.1)

In [None]:
metrics = tf_model.evaluate(x_test, y_test)
print(metrics)

In [None]:
tf_model_path = 'gs://' + BUCKET + '/tf'

In [None]:
tf_model.save(tf_model_path, save_format='tf')

In [None]:
!gcloud ai-platform versions create 'v2' \
  --model 'flight_delay_prediction' \
  --origin $tf_model_path \
  --runtime-version=2.1 \
  --framework 'TENSORFLOW' \
  --python-version=3.7

In [None]:
# Make a prediction to the new version
!gcloud ai-platform predict --model 'flight_delay_prediction' --version \
  'v2' --json-instances 'input.json'

## **Alternative: Reframing as a Regression Problem**

In [None]:
regression_model_labels = df['arr_delay']

regression_model_data = df.drop(columns=['arr_delay', 'arr_delay_bool'])

In [None]:
x,y = regression_model_data,regression_model_labels

x_train,x_test,y_train,y_test = train_test_split(x,y)

In [None]:
# This will take 1-2 minutes to run
model.fit(x_train, y_train)

In [None]:
y_pred = model.predict(x_test)

In [None]:
for i,val in enumerate(y_pred[:10]):
  print(val)
  print(y_test.iloc[i])
  print()

In [None]:
model.save_model('model.bst')

In [None]:
!gsutil cp model.bst gs://$BUCKET/regression/

In [None]:
!gcloud ai-platform models create 'flights_regression'

In [None]:
# Setting The Version
!gcloud ai-platform versions create 'v1' \
  --model 'flights_regression' \
  --origin gs://$BUCKET/regression \
  --runtime-version=1.15 \
  --framework 'XGBOOST' \
  --python-version=3.7

In [None]:
!gcloud ai-platform predict --model 'flighs_regression' --version \
  'v1' --json-instances 'input.json'