# **Tabular GAN**


## **1. Conda**

In [None]:
#fdf_conda used

In [28]:
#1. Establish connection between notebook session and your newly created bucket.
!odsc conda init -b conda_environment_yolov5 -n frqap2zhtzbe -a resource_principal

In [31]:
#2. Publish the 'object_detection_apex_conda' to the bucket. Make sure the bucket has no other custom conda environment, this will partly be overwritten
#!odsc conda publish -s fdf_conda

## **2. Import**

In [24]:
from ads.model.framework.tensorflow_model import TensorFlowModel
from ads.common.model_metadata import UseCaseType
from ads.common.model_artifact import ModelArtifact
from ads.common.model_export_util import prepare_generic_model
import os

In [25]:
#path to artifacts and conda slug
path_to_artifacts = '/home/datascience/model_artifacts'
conda_env = 'oci://conda_environment_yolov5@frqap2zhtzbe/conda_environments/cpu/fdf_conda/1.0/fdf_conda'   #this refers to the published conda location (bucket name, namespace)

#create default artifacts
artifact = prepare_generic_model(path_to_artifacts, fn_artifact_files_included=False, force_overwrite=True, inference_conda_env=conda_env)

loop1:   0%|          | 0/4 [00:00<?, ?it/s]



## **3. Change score.py**

In [26]:
%%writefile "{path_to_artifacts}/score.py"

import ocifs
import os
from ocifs import OCIFileSystem
import pandas as pd
from sdv.demo import load_tabular_demo
from sdv.tabular import CTGAN

########################### ########################### ########################### ########################### 
########################### 
########################### Load a dummy model
########################### 
########################### ########################### ########################### ########################### 


def load_model():
    class DummyModel:
        def __init__(self):
            pass
    return DummyModel()


#######################################################
####################################################### predict
#######################################################

def predict(data, model=load_model()):
    
    prim_key = data["prim_key"]
    csv_name = data["csv_name"]
    number_new_rows = data["number_new_rows"]
    
    #get the csv file from bucket
    input_location = "oci://West_BP@frqap2zhtzbe/"
    input_csv = pd.read_csv(input_location + csv_name)

    #get max 200 rows to train on
    data_short = input_csv.head(200)
    
    #load ctgan model
    model = CTGAN(primary_key=prim_key)
    
    #fit model on short data
    model.fit(data_short)
    
    #output file name
    output_file_name = "/home/datascience/synthetic_" + csv_name
    
    #delete file

    if os.path.exists(output_file_name):
        os.remove(output_file_name)
    else:
        print("The file does not exist yet, but that's fine")
    
    #c
    
    #create new synthetic rows
    new_data = model.sample(num_rows = number_new_rows, output_file_path = output_file_name)  #defaults to ./ to save
    
            
    fs = OCIFileSystem()
    fs.invalidate_cache()
    
    new_csv_local_path = os.path.join("/home/datascience/", output_file_name)
    print(new_csv_local_path)
       
    with open(new_csv_local_path, 'rb') as f:
        with fs.open(input_location + os.path.basename(new_csv_local_path), 'wb') as file_out:
            file_out.write(f.read())
            
    done = print("Synthetic .csv file is available")
    
    return done

Overwriting /home/datascience/model_artifacts/score.py


In [26]:
data = {"prim_key":"date_time", "csv_name":"Metro_Interstate_Traffic_Volume_Edit.csv", "number_new_rows":250}
predict(data)

/home/datascience/synthetic_Metro_Interstate_Traffic_Volume_Edit.csv


Unnamed: 0,holiday,temp,rain_1h,snow_1h,clouds_all,weather_main,weather_description,date_time,traffic_volume
0,,298.17,0.0,0.0,0,Rain,scattered clouds,a,651
1,,278.78,0.0,0.0,11,Clear,few clouds,b,2711
2,,271.22,0.0,0.0,0,Clouds,broken clouds,c,2318
3,,274.68,0.0,0.0,85,Clear,sky is clear,d,251
4,,280.88,0.0,0.0,24,Clear,scattered clouds,e,2365
...,...,...,...,...,...,...,...,...,...
245,,288.29,0.0,0.0,70,Clouds,scattered clouds,dL,251
246,,282.29,0.0,0.0,83,Clouds,few clouds,dM,4623
247,,272.86,0.0,0.0,0,Clouds,sky is clear,dN,992
248,,296.81,0.0,0.0,80,Clouds,overcast clouds,dO,2671


## **4. Store in model catalog**

In [27]:
#all should be passed
artifact.introspect()

['test_json_output.json', '.ipynb_checkpoints', 'runtime.yaml', 'score.py']


Unnamed: 0,Test key,Test name,Result,Message
0,runtime_env_path,Check that field MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is set,Passed,
1,runtime_env_python,Check that field MODEL_DEPLOYMENT.INFERENCE_PYTHON_VERSION is set to a value of 3.6 or higher,Passed,
2,runtime_path_exist,Check that the file path in MODEL_DEPLOYMENT.INFERENCE_ENV_PATH is correct.,Passed,
3,runtime_version,Check that field MODEL_ARTIFACT_VERSION is set to 3.0,Passed,
4,runtime_yaml,"Check that the file ""runtime.yaml"" exists and is in the top level directory of the artifact directory",Passed,
5,score_load_model,Check that load_model() is defined,Passed,
6,score_predict,Check that predict() is defined,Passed,
7,score_predict_arg,Check that all other arguments in predict() are optional and have default values,Passed,
8,score_predict_data,"Check that the only required argument for predict() is named ""data""",Passed,
9,score_py,"Check that the file ""score.py"" exists and is in the top level directory of the artifact directory",Passed,


In [28]:
# Saving the model artifact to the model catalog. 
catalog_entry = artifact.save(display_name='synthetic_gan_v3', description='synthetic_gan_v3', timeout=600)

catalog_entry.id

loop1:   0%|          | 0/5 [00:00<?, ?it/s]

artifact:/tmp/saved_model_e0755524-de4b-4a59-b11e-afa6add3daf8.zip


'ocid1.datasciencemodel.oc1.eu-frankfurt-1.amaaaaaangencdyamolhca4nilqxjnz6bgq6xrjjau647lhtfwhm6npgkstq'

## **Deploy in UI**

In [29]:
import requests
import oci
from oci.signer import Signer

In [30]:
uri = f"https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyackrqrerar5yzqfi4rzcdtzp7xwhoiuc3rd6an22lwiya/predict"
print(uri)

https://modeldeployment.eu-frankfurt-1.oci.customer-oci.com/ocid1.datasciencemodeldeployment.oc1.eu-frankfurt-1.amaaaaaangencdyackrqrerar5yzqfi4rzcdtzp7xwhoiuc3rd6an22lwiya/predict


In [31]:
%%time
auth = oci.auth.signers.get_resource_principals_signer()

data = {"prim_key":"date_time", "csv_name":"Metro_Interstate_Traffic_Volume_Edit.csv", "number_new_rows":250}

response = requests.post(uri, json=data, auth=auth)
print(response)

<Response [200]>
CPU times: user 87.3 ms, sys: 5.64 ms, total: 92.9 ms
Wall time: 29.7 s


In [32]:
import json
print(json.loads(response.content))

None
