In [1]:
import pandas as pd
import numpy as np
import pickle

from ads.dataset.factory import DatasetFactory
from ads.dataset.dataset_browser import DatasetBrowser
from time import process_time

import ads
import oci
import cx_Oracle
import os

#### Parametros

In [2]:
# [Parameter:oci_adw] Autonomous Database
par_oci_adw_config = {
    "user_name"       : "unsername",
    "password"        : "*********",
    "service_name"    : "dbadwprodfarma_low",
    "wallet_location" : "/home/datascience/oci/adw/instantclient_19_5/network/admin/Wallet.zip",
}

In [3]:
# [Parameter:oci_obj] OCI Object Storage
par_oci_obj_bucket_name            = 'Bucket_DATA_SCIENCE'
# The profile parameter (ociProfileName) 'LOCAL' or 'DATAFLOW' in OCI
par_oci_obj_ociConfigFilePath      = '~/oci/config'
par_oci_obj_ociProfileName         = 'DEFAULT'
par_oci_obj_config                 = oci.config.from_file(par_oci_obj_ociConfigFilePath, par_oci_obj_ociProfileName)
par_oci_obj_object_storage_client  = oci.object_storage.ObjectStorageClient(par_oci_obj_config)
par_oci_obj_namespace_name         = par_oci_obj_object_storage_client.get_namespace().data

#### Funciones

In [4]:
# fn_load_data_from_object_storage_to_pandas
# Descripción : Cargar datos de OCI Object Storage y convierte de Dataset a Dataframe (Pandas)
# Referencia  : https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/user_guide/loading_data/connect.html?highlight=cket_name%20namespace%20file_name
def fn_load_data_from_object_storage_to_pandas(object_name):
    try:
        ts = process_time()
        ads.set_auth(auth='resource_principal')
        storage_options = {
           "config"  : par_oci_obj_ociConfigFilePath,
           "profile" : par_oci_obj_ociProfileName
        }
        ds = ads.dataset.factory.DatasetFactory.open(f"oci://{par_oci_obj_bucket_name}@{par_oci_obj_namespace_name}/{object_name}", storage_options=storage_options)
        df = ds.to_pandas_dataframe()

        te = process_time()
        print('# Loaded data to pandas (' + object_name + ')...[seg: '+ str(te-ts) +']')
        
        return df
    
    except Exception as e:
        print(e)

In [10]:
# [Exanple] fn_load_data_from_object_storage_to_pandas
df = fn_load_data_from_object_storage_to_pandas('Datasets/SucGanadora.xlsx')
df

loop1:   0%|          | 0/4 [00:00<?, ?it/s]

# Loaded data to pandas (Datasets/SucGanadora.xlsx)...[seg: 0.23423578899999953]


Unnamed: 0,DNI
0,21851757
1,30041328
2,32897326
3,31380842
4,25121604
...,...
258,28211058
259,29042554
260,43087693
261,44158264


In [5]:
# fn_read_sql_from_adw
# Descripción : Lee datos de Autonomous Database y retorna un Dataframe
# Referencia  : https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/ads.dataset.html?highlight=read_sql#ads.dataset.factory.CustomFormatReaders.read_sql
def fn_read_sql_from_adw(sql):
    try:
        ts = process_time()
        df = pd.DataFrame.ads.read_sql(
            "SELECT * FROM DUAL",
            connection_parameters=par_oci_adw_config,
        )        

        te = process_time()
        print('# Read data fron adw (' + sql + ')...[seg: '+ str(te-ts) +']')
        print('# Number of rows (' + str(len(df)) + ')')
        
        return df
    
    except Exception as e:
        print(e)

In [11]:
# [Exanple] fn_read_sql_from_adw
df = fn_read_sql_from_adw('SELECT * FROM DUAL')
df

# Read data fron adw (SELECT * FROM DUAL)...[seg: 0.07938560299999864]
# Number of rows (1)


Unnamed: 0,DUMMY
0,X


In [12]:
# fn_load_data_from_dataframe_to_object_storage
# Descripción : Carga datos de un Dataframe a OCI Object Storage for Datalake
# Referencia  : https://docs.oracle.com/en-us/iaas/tools/ads-sdk/latest/ads.dataset.html?highlight=to_csv#ads.dataset.dataset.ADSDataset.to_csv
def fn_load_data_from_dataframe_to_object_storage(object_name,df):
    try:
        ts = process_time()
        ads.set_auth(auth='resource_principal')
        storage_options = {
           "config"  : par_oci_obj_ociConfigFilePath,
           "profile" : par_oci_obj_ociProfileName
        }
        df.to_csv(f"oci://{par_oci_obj_bucket_name}@{par_oci_obj_namespace_name}/{object_name}", storage_options=storage_options,index=False)

        te = process_time()
        print('# Load data from dataframe to object storage (' + object_name + ')...[seg: '+ str(te-ts) +']')        
    
    except Exception as e:
        print(e)

In [13]:
# [Exanple] fn_load_data_from_dataframe_to_object_storage
fn_load_data_from_dataframe_to_object_storage('Datasets/obj_example_fn_load_data_from_dataframe_to_object_storage.csv',df)
df

# Load data from dataframe to object storage (Datasets/dts_demo.csv)...[seg: 0.01332073200000039]


Unnamed: 0,DUMMY
0,X


In [15]:
# fn_upload_object_to_object_storage
# Descripción : Subir objecto a OCI Object Storage y convertir de Dataset a Bytes (CSV)
# Referencia  : https://github.com/jganggini/oci-data-flow/blob/main/upload-unstructured-data-to-autonomous-database/src/oci_object_storage.py
def fn_upload_object_to_object_storage(bucket_name, object_name, obj_bytes):
    try:
        ts = process_time()
        par_oci_obj_object_storage_client.put_object(namespace_name=par_oci_obj_namespace_name,bucket_name=bucket_name, object_name=object_name, put_object_body=obj_bytes)
        te = process_time()
        
        print('# Upload object (' + object_name + ')...[seg: '+ str(te-ts) +']')

    except Exception as e:
        print(e)

In [17]:
# [Exanple] fn_upload_object_to_object_storage
obj_bytes = bytes(df.to_csv(line_terminator='\r\n', index=False), encoding='utf-8')
fn_upload_object_to_object_storage(par_oci_obj_bucket_name, 'Datasets/obj_fn_upload_object_to_object_storage.csv', obj_bytes)

# Upload object (Datasets/obj_fn_upload_object_to_object_storage.csv)...[seg: 0.026141036000000284]


In [19]:
# fn_download_object_to_object_storage
# Descripción : Descarga objeto de OCI Object Storage (Bytes)
# Referencia  : https://github.com/jganggini/oci-data-flow/blob/main/upload-unstructured-data-to-autonomous-database/src/oci_object_storage.py
def fn_download_object_to_object_storage(bucket_name, object_name):
    try:
        ts = process_time()
        get_object_response = par_oci_obj_object_storage_client.get_object(namespace_name=par_oci_obj_namespace_name,bucket_name=bucket_name, object_name=object_name)
        te = process_time()
        print('# Download object (' + object_name + ')...[seg: '+ str(te-ts) +']')
        
        return get_object_response

    except Exception as e:
        print('  Download object (' + object_name + ') not exists...[Warning]')

        return e

In [22]:
# [Exanple] fn_download_object_to_object_storage
obj_response = fn_download_object_to_object_storage(par_oci_obj_bucket_name, 'Datasets/obj_fn_upload_object_to_object_storage.csv')

# Decode Object
ts = process_time()
obj_bytes = obj_response.data.content
obj_str = obj_bytes.decode('latin-1')
te = process_time()
print('# Decode object...[seg: '+ str(te-ts) +']')

# Filter Object
ts = process_time()
lis_old = obj_str.split("\r\n")
lis_new = list(filter(lambda item : len(item)>1 , lis_old))
te = process_time()
print('# Filter object...[seg: '+ str(te-ts) +']')

# Dataframe
df = pd.DataFrame(lis_new)
df

# Download object (Datasets/obj_fn_upload_object_to_object_storage.csv)...[seg: 0.0286947780000002]
# Decode object...[seg: 0.0003030650000006574]
# Filter object...[seg: 0.00019912600000004943]


Unnamed: 0,0
0,DUMMY


In [23]:
# [Exanple] Dataframe Split column
df = df[0].str.split('|',expand=True)
te = process_time()
print('# Split column...[seg: '+ str(te-ts) +']')
df

# Split column...[seg: 0.02872312500000085]


Unnamed: 0,0
0,DUMMY


In [25]:
# [Exanple] Pickle
obj_response = fn_download_object_to_object_storage(par_oci_obj_bucket_name, 'Datasets/model.sav')

# Decode Object
ts = process_time()
obj_bytes = obj_response.data.content

# Pickle
import pickle

mod_pickle = pickle.loads(obj_bytes)
mod_pickle
te = process_time()
print('# Read pickle object...[seg: '+ str(te-ts) +']')



AttributeError: 'ServiceError' object has no attribute 'data'