In [1]:
import numpy as np
import pandas as pd
import pathlib
import os
import tensorflow as tf
import argparse
from tensorflow import keras
from tensorflow.keras import layers



2022-08-09 14:44:56.933764: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-08-09 14:44:56.952935: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-08-09 14:44:56.952951: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


In [2]:
dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
dataset_path



'/home/jesusarguelles/.keras/datasets/auto-mpg.data'

In [4]:
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(dataset_path, names=column_names,
                      na_values = "?", comment='\t',
                      sep=" ", skipinitialspace=True)

In [5]:
dataset

Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1
...,...,...,...,...,...,...,...,...
393,27.0,4,140.0,86.0,2790.0,15.6,82,1
394,44.0,4,97.0,52.0,2130.0,24.6,82,2
395,32.0,4,135.0,84.0,2295.0,11.6,82,1
396,28.0,4,120.0,79.0,2625.0,18.6,82,1


In [6]:
!rm -fr custom_draft
!mkdir custom_draft
!touch custom_draft/__init__.py

In [1]:
%%writefile custom_draft/ctx.py

MODEL_URI='gs://vtx-models'

Overwriting custom_draft/ctx.py


In [7]:
%%writefile custom_draft/preprocess.py

from custom_draft import ctx

## Data Cleaning and Normalizating, exporting statistics.

def train_pre_process(dataset):
    import pandas as pd

    dataset = dataset.dropna()
    dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})
    dataset = pd.get_dummies(dataset, prefix='', prefix_sep='')
    
    train_dataset = dataset.sample(frac=0.8, random_state=0)
    test_dataset = dataset.drop(train_dataset.index)
    
    train_stats = train_dataset.describe()
    train_stats.pop('MPG')
    train_stats = train_stats.transpose()
    train_stats.to_csv(f'{ctx.MODEL_URI}/mpg/stats.csv')
    train_stats.to_csv('stats_2.csv')
    train_labels = train_dataset.pop('MPG')
    test_labels = test_dataset.pop('MPG')
    
    def norm(x):
        return (x - train_stats['mean'])/train_stats['std']
    normed_train_data = norm(train_dataset)
    normed_test_data = norm(test_dataset)

    return normed_train_data, train_labels, normed_test_data, test_labels

## Using training statistics to equals normalization.

def pred_data_process(data: list):
    import pandas as pd
    
    column_names = ['Cylinders', 'Displacement', 'Horsepower', 'Weight', 'Acceleration', 'Model Year', 'Origin']
    dataset = pd.DataFrame([data], columns=column_names)

    dataset = dataset.dropna()

    if (dataset['Origin'] == 1).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 0
        dataset['Japan'] = 0
        dataset['USA'] = 1

    elif (dataset['Origin'].any == 2).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 1
        dataset['Japan'] = 0
        dataset['USA'] = 0

    elif (dataset['Origin'] == 3).any():
        dataset = dataset.drop(columns=['Origin'])
        dataset['Europe'] = 0
        dataset['Japan'] = 1
        dataset['USA'] = 0

    ## Train stats

    train_stats = pd.read_csv('stats.csv', index_col=[0])
    
    def norm(x):
        return (x - train_stats['mean'])/train_stats['std']
    normed_data = norm(dataset)

    return normed_data

Overwriting custom_draft/preprocess.py


In [8]:
%%writefile custom_draft/train.py

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from custom_draft import ctx

def build_model(train_dataset):
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss='mse',
        optimizer=optimizer,
        metrics=['mae', 'mse'])
    
    return model

def train_model(train_data, train_labels, epochs: int = 1000):
    model = build_model(train_data)
    epochs = epochs
    
    # The patience parameter is the amount of epochs to check for improvement
    early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)
    
    early_history = model.fit(train_data, train_labels, 
        epochs=epochs, validation_split = 0.2, 
        callbacks=[early_stop])
    
    model.save(f'{ctx.MODEL_URI}/mpg/model')

    return model

Overwriting custom_draft/train.py


In [9]:
from tensorflow import keras
import pandas as pd
from custom_draft import preprocess
from custom_draft import train

dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight','Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t',sep=" ", skipinitialspace=True)

train_data, train_labels, test_data, test_labels = preprocess.train_pre_process(dataset)
model = train.train_model(train_data, train_labels)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataset['Origin'] = dataset['Origin'].map({1: 'USA', 2: 'Europe', 3: 'Japan'})


              count         mean         std     min      25%     50%  \
Cylinders     314.0     5.477707    1.699788     3.0     4.00     4.0   
Displacement  314.0   195.318471  104.331589    68.0   105.50   151.0   
Horsepower    314.0   104.869427   38.096214    46.0    76.25    94.5   
Weight        314.0  2990.251592  843.898596  1649.0  2256.50  2822.5   
Acceleration  314.0    15.559236    2.789230     8.0    13.80    15.5   
Model Year    314.0    75.898089    3.675642    70.0    73.00    76.0   
Europe        314.0     0.178344    0.383413     0.0     0.00     0.0   
Japan         314.0     0.197452    0.398712     0.0     0.00     0.0   
USA           314.0     0.624204    0.485101     0.0     0.00     1.0   

                  75%     max  
Cylinders        8.00     8.0  
Displacement   265.75   455.0  
Horsepower     128.00   225.0  
Weight        3608.00  5140.0  
Acceleration    17.20    24.8  
Model Year      79.00    82.0  
Europe           0.00     1.0  
Japan        

In [6]:
train_labels

146    28.0
282    22.3
69     12.0
378    38.0
331    33.8
       ... 
281    19.8
229    16.0
150    26.0
145    32.0
182    28.0
Name: MPG, Length: 314, dtype: float64

In [11]:
%%writefile custom_draft/main.py

from fastapi import Request, FastAPI
import tensorflow as tf
import json
import os
from custom_draft import preprocess

app = FastAPI()

model_uri=os.environ['AIP_STORAGE_URI']
print(f'[INFO] ------ {model_uri}', file=sys.stderr)
model = tf.keras.models.load_model(f'{model_uri}/mpg/model')

@app.get('/')
def get_root():
    return {'message': 'Welcome mpg API: miles per gallon prediction'}

@app.get('/health_check')
def health():
    return 200

if os.environ.get('AIP_PREDICT_ROUTE') is not None:
    method = os.environ['AIP_PREDICT_ROUTE']
else:
    method = '/predict'

@app.post(method)
async def predict(request: Request):
    print("----------------- PREDICTING -----------------")
    body = await request.json()
    instances = body["instances"]

    norm_data = preprocess.pred_data_process(instances)
    
    outputs = model.predict(norm_data)
    response = outputs.tolist()
    print("----------------- OUTPUTS -----------------")
    return {"predictions": response}


Writing custom_draft/main.py


In [15]:
%%writefile custom_draft/Dockerfile

FROM tiangolo/uvicorn-gunicorn-fastapi:python3.7

COPY / /app
WORKDIR /app
RUN pip install google-cloud-storage tensorflow
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8080"]

EXPOSE 8080

Overwriting custom_draft/Dockerfile


In [16]:
PRED_IMAGE_URI = "gcr.io/jchavezar-demo/fast_pred:v1"

In [17]:
!gcloud builds submit -t $PRED_IMAGE_URI custom_draft/.

Creating temporary tarball archive of 6 file(s) totalling 4.1 KiB before compression.
Uploading tarball of [custom_draft/.] to [gs://jchavezar-demo_cloudbuild/source/1662493898.857458-6201bc78a73242788b03a6860e0042eb.tgz]
Created [https://cloudbuild.googleapis.com/v1/projects/jchavezar-demo/locations/global/builds/b858dea8-3bca-4206-adad-a9dbaa5b2ad1].
Logs are available at [ https://console.cloud.google.com/cloud-build/builds/b858dea8-3bca-4206-adad-a9dbaa5b2ad1?project=569083142710 ].
----------------------------- REMOTE BUILD OUTPUT ------------------------------
starting build "b858dea8-3bca-4206-adad-a9dbaa5b2ad1"

FETCHSOURCE
Fetching storage object: gs://jchavezar-demo_cloudbuild/source/1662493898.857458-6201bc78a73242788b03a6860e0042eb.tgz#1662493899371351
Copying gs://jchavezar-demo_cloudbuild/source/1662493898.857458-6201bc78a73242788b03a6860e0042eb.tgz#1662493899371351...
/ [1 files][  1.8 KiB/  1.8 KiB]                                                
Operation completed ove

In [3]:
import tensorflow as tf

model = tf.keras.models.load_model('gs://vtx-models/mpg/model')
model

<keras.engine.sequential.Sequential at 0x7f891a263dd0>

In [5]:
model.predict(preprocess.pred_data_process([8.0, 350.0, 165.0, 3693.0, 11.5, 70.0, 1.0]))



array([[15.102618]], dtype=float32)

In [1]:
from tensorflow import keras
import pandas as pd
from custom_draft import preprocess
from custom_draft import train

dataset_path = keras.utils.get_file("auto-mpg.data", "http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data")
column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight','Acceleration', 'Model Year', 'Origin']
dataset = pd.read_csv(dataset_path, names=column_names, na_values = "?", comment='\t',sep=" ", skipinitialspace=True)

dataset.head()

2022-09-07 12:26:58.059857: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2022-09-07 12:26:58.064552: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-09-07 12:26:58.064573: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Unnamed: 0,MPG,Cylinders,Displacement,Horsepower,Weight,Acceleration,Model Year,Origin
0,18.0,8,307.0,130.0,3504.0,12.0,70,1
1,15.0,8,350.0,165.0,3693.0,11.5,70,1
2,18.0,8,318.0,150.0,3436.0,11.0,70,1
3,16.0,8,304.0,150.0,3433.0,12.0,70,1
4,17.0,8,302.0,140.0,3449.0,10.5,70,1
