In [None]:
!pip install azure-cosmos

In [None]:
from os import getenv
import json
import pandas as pd
import uuid
import hashlib

from azure.cosmos import CosmosClient, PartitionKey
import azure.cosmos.errors as errors
import azure.cosmos.documents as documents
import azure.cosmos.http_constants as http_constants

In [None]:
CONFIG = {
    "endpoint": "https://db-nosql.documents.azure.com:443/",
    "database" : "meubndes",
    "container" : "operacoes",
    "masterkey": "bQPdXS0y1GLPLeCh2neFzdNX9bd6RKq1J8pTjzsRQG8Tf7DV0xyOe24lwtRTKwF6NbXHV9JRBACIOvfHPHOOHw==",
    "readkey" : "Q9ikAx2k6puQIrVHHN1xosrfAF0amUqTmBC724wJdNxTP3JGNKnQMpixOOkHhgWiVyCMi4xZup7rMeetig1eWg=="
}

CONTAINER_LINK = f"dbs/{CONFIG['database']}/colls/{CONFIG['container']}"
FEEDOPTIONS = {}
FEEDOPTIONS["enableCrossPartitionQuery"] = True
# There is also a partitionKey Feed Option, but I was unable to figure out how to us it.

In [None]:
#Generate hash key
def get_hash_from_string(s):
    m = hashlib.md5() #sha256
    m.update(str.encode(s))
    return m.hexdigest()

In [None]:
# Initialize the Cosmos client
client = CosmosClient(
    CONFIG["endpoint"], {"masterKey": CONFIG["masterkey"]}
)

# Create a database if not exists
DATABASE = client.create_database_if_not_exists(id=CONFIG['database'])

# Create a collection of items in a Container if not exists
CONTAINER = DATABASE.create_container_if_not_exists(
    id=CONFIG['container'],
    partition_key=PartitionKey(path='/id'),
    offer_throughput=400
)

In [None]:
def get_items(item_id=None):
    query = 'SELECT * FROM c'
    if item_id:
        query += ' WHERE c.id = @item_id'

    params = {
        'query': query,
        'parameters': [{"name": "@item_id", "value": str(item_id)}],
        'enable_cross_partition_query': True
    }
    res = CONTAINER.query_items(**params)
    return list(res)

def create_item(item):
    item['id'] = str(uuid.uuid4())
    CONTAINER.create_item(body=item)
    return item

def delete_item(item_id):
    res = get_items(item_id)
    if not res:
        return False
    item = res[0]
    CONTAINER.delete_item(item, partition_key=item['category'])
    return True

def complete_item(item_id, is_complete=True):
    res = get_items(item_id)
    if not res:
        return False
    item = res[0]
    new_item = item
    new_item['isComplete'] = is_complete
    CONTAINER.replace_item(item, new_item)
    return True

In [None]:
from google.colab import drive
drive.mount('/gdrive')
%cd /gdrive

Mounted at /gdrive
/gdrive


In [None]:
# Download and read csv file
df = pd.read_parquet('/gdrive/MyDrive/BNDES/OPERACOES/operacoes-automaticas.parquet')

In [None]:
# Reset index - creates a column called 'index'
df = df.reset_index()
# Rename that new column 'id'
# Cosmos DB needs one column named 'id'. 
df = df.rename(columns={'index':'id'})
# Convert the id column to a string - this is a document database.
df['id'] = df['id'].astype(str)
#df['oid'] = get_hash_from_string(str(df['cliente']+"-"+df['data_da_contratacao']))

In [None]:
# Fill NaN
df = df.fillna(0)

In [None]:
df.head()

In [None]:
#df_BH = df[df['municipio'] == 'BELO HORIZONTE']
#df_BH.info()

In [None]:
import numpy as np

class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [None]:
# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#upsertitem-database-or-container-link--document--options-none-
# Write rows of a pandas DataFrame as items to the Database Container
for i in range(0,df.shape[0]):
    # create a dictionary for the selected row
    data_dict = dict(df.iloc[i,:])
    # convert the dictionary to a json object.
    data_dict = json.dumps(data_dict, cls=NpEncoder)
    #print(data_dict)
    insert_data = create_item(json.loads(data_dict))
print('Records inserted successfully.')

Records inserted successfully.
