In [None]:
# use azure cli to create resource group in Azure
# az login
# az group create --location eastus --name performance-benchmark-group --subscription "Azure subscription 1"
# az cosmosdb create --name ucusqlcosmos \
#                    --resource-group performance-benchmark-group \
#                    --locations regionName=eastus \
#                    --subscription "Azure subscription 1"

In [None]:
pip install azure-cosmos
pip install pandas

In [2]:
import pandas as pd
import json
import azure.cosmos.cosmos_client as cosmos_client
import azure.cosmos.errors as errors
import azure.cosmos.documents as documents
import azure.cosmos.http_constants as http_constants
import settings as s
from azure.cosmos import CosmosClient, PartitionKey, exceptions

print('Imported packages successfully.')

# Initialize the Cosmos client

config = {
    "endpoint": f"{s.ENDPOINT}",
    "primarykey": f"{s.PRIMARY_KEY}"
}

# Create the cosmos client
client = cosmos_client.CosmosClient(config["endpoint"], config["primarykey"])


Imported packages successfully.


In [3]:

database_name = 'ucudatabase'
try:
    database = client.create_database(database_name)
except errors.CosmosHttpResponseError:
    database = client.get_database_client(database_name)


# Create a collection of items in a Container
# Items are the individual rows/records of your dataset
# https://docs.microsoft.com/en-us/python/api/azure-cosmos/azure.cosmos.cosmos_client.cosmosclient?view=azure-python#createcontainer-database-link--collection--options-none-


In [27]:
database_link = 'dbs/' + 'ucudatabase'
container_definition = {'id': 'UCUcontainer',
                        'partitionKey':
                        {
                            'paths': ['/country'],
                            'kind': documents.PartitionKind.Hash
                        }
                        }
try:
    container = database.create_container(
        id=f"{container_definition['id']}", partition_key=PartitionKey(path="/country"))
except exceptions.CosmosResourceExistsError:
    container = database.get_container_client(f"{container_definition['id']}")
except exceptions.CosmosHttpResponseError:
    raise


In [28]:
# Download and read csv file
df = pd.read_csv('https://globaldatalab.org/assets/2019/09/SHDI%20Complete%203.0.csv',
                 encoding='ISO-8859–1', dtype='str')
# df = pd.read_csv("SHDI_data.csv")
# Reset index - creates a column called 'index'
df = df.reset_index()
# Rename that new column 'id'
# Cosmos DB needs one column named 'id'.
df = df.rename(columns={'index': 'id'})
# Convert the id column to a string - this is a document database.
df['id'] = df['id'].astype(str)
df.head(2)

48439


In [None]:
collection_link = database_link + '/colls/' + 'UCUcontainer'

for i in range(0, df.shape[0]):
    # create a dictionary for the selected row
    data_dict = dict(df.iloc[i, :])
    # convert the dictionary to a json object.
    data_dict = json.dumps(data_dict)
    insert_data = container.upsert_item(json.loads(data_dict))
# for dataset this took about 15 minutes for globaldatalab.org website!!!!!!  
print('Records inserted successfully.')


In [None]:
dflist = []
collection_link = database_link + '/colls/' + 'UCUcontainer'
query = 'SELECT * FROM c where c.country="Afghanistan" and c.level="National"'

# For-loop to retrieve individual json records from Cosmos DB
# that satisfy our query
for item in container.query_items(query,
                                  enable_cross_partition_query=True
                              ):
    # Append each item as a dictionary to list
    dflist.append(dict(item))

# Convert list to pandas DataFrame
df = pd.DataFrame(dflist)
df.head()
print(len(df.index))