In [5]:
import os, uuid
import pandas as pd
from azure.identity import DefaultAzureCredential
from azure.storage.blob import BlobServiceClient, BlobClient, ContainerClient

### Establishing connection with Azure

In [23]:
# Retrieve the connection string for use with the application. The storage
# connection string is stored in an environment variable on the machine
# running the application called AZURE_STORAGE_CONNECTION_STRING. If the environment variable is
# created after the application is launched in a console or with Visual Studio,
# the shell or application needs to be closed and reloaded to take the
# environment variable into account.
connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
# Create the BlobServiceClient object
blob_service_client = BlobServiceClient.from_connection_string(connect_str)

### Utilizing DefaultAzureCredential

In [3]:
account_url = "https://inappjson.blob.core.windows.net"
default_credential = DefaultAzureCredential()

# Create the BlobServiceClient object
blob_service_client = BlobServiceClient(account_url, credential=default_credential)

Sample blob download - one file into one location

In [4]:
# Download the blob to a local file
local_path = "assets/jsons/"
# Add 'AFS' before the .json extension to see both files in the data directory
download_file_path = os.path.join(local_path, str.replace('err_indata.json','.json','AFS.json')) 
container_client = blob_service_client.get_container_client(container='jsons') 
print("\nDownloading blob to \n\t" + download_file_path)

with open(file=download_file_path, mode="wb") as download_file:
 download_file.write(container_client.download_blob('test1.json').readall())


Downloading blob to 
	assets/jsons/err_indataAFS.json


HttpResponseError: This request is not authorized to perform this operation using this permission.
RequestId:6ddd4100-c01e-0056-199a-1834ce000000
Time:2023-11-16T14:36:09.6040874Z
ErrorCode:AuthorizationPermissionMismatch
Content: <?xml version="1.0" encoding="utf-8"?><Error><Code>AuthorizationPermissionMismatch</Code><Message>This request is not authorized to perform this operation using this permission.
RequestId:6ddd4100-c01e-0056-199a-1834ce000000
Time:2023-11-16T14:36:09.6040874Z</Message></Error>

### More advanced download:

1. Creating container_client 
2. Listing blobs inside the container `jsons`
3. Iterating over downlading from container into `local_path`

In [11]:
# Download the blob to a local file
local_path = "assets/jsons/"
try:
    os.mkdir(local_path)
except:
    print("Folder already exists. Proceeding to downlad")
# Read the container for filenames:
container_client = blob_service_client.get_container_client(container='jsons')
filenames = container_client.list_blobs()
# Add 'AFS' before the .json extension to see both files in the data directory
for blob in filenames:
    with open('{}{}'.format(local_path, blob.name), 'w') as fp: 
        pass
    download_file_path = os.path.join(local_path, str.replace(blob.name ,'.json', 'AFS.json'))
    print("\nDownloading blob to \n\t" + download_file_path)
    with open(file=download_file_path, mode="wb") as download_file:
        download_file.write(container_client.download_blob(blob.name).readall())

Folder already exists. Proceeding to downlad

Downloading blob to 
	assets/jsons/err_indataAFS.json

Downloading blob to 
	assets/jsons/indataAFS.json

Downloading blob to 
	assets/jsons/test1AFS.json


### Deleting files in the container

In [9]:
filenames = container_client.list_blobs()

for blob in filenames:
    container_client.delete_blob(blob.name)
    print("Deleted blob:{}".format(blob.name))

Deleted blob:err_indata.json
Deleted blob:indata.json
Deleted blob:test1.json


### Uploading files into the container

In [19]:
import glob, json
uploads = glob.glob('*.json')
print(uploads)
for js in uploads:
    with open(js) as jsf:
        #blob = jsf.read()
        dict = json.load(jsf)
        username = dict['username']
        date = dict['operations'][0]['date']
        date = str.replace(date, ":", "-")
        print(date, username)
    print(blob)
    #blob_client = blob_service_client.get_blob_client(container='jsons', blob=js)
    #blob_client.upload_blob(blob, blob_type="BlockBlob")

['err_indata.json', 'indata.json', 'test1.json']
22-10-1_25/10/2023 Test_user
{
    "username": "Test_user",
    "operations": [
    {   "type": "switch",
        "date": "22:10:1_25/10/2023"
    },
    {   "type": "ope",
        "date": "08:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "12:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "14:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "16:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "18:10:12_26/10/2023"
    },
    {   "type": "opn",
        "date": "22:10:12_26/10/2023"
    }
    ]
}
22-10-12_25/10/2023 Test_user
{
    "username": "Test_user",
    "operations": [
    {   "type": "switch",
        "date": "22:10:1_25/10/2023"
    },
    {   "type": "ope",
        "date": "08:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "12:10:12_26/10/2023"
    },
    {   "type": "open",
        "date": "14:10:12_26/10/2023"
    },
    {   "type": "open",
 

Advanced Uploading testing:

In [31]:
import glob, json

js = 'test1.json'

connect_str = os.getenv('AZURE_STORAGE_CONNECTION_STRING')
# Create the BlobServiceClient object
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
# Retriving username and first datetime from the file
with open(js) as file:
    dict = json.load(file)
    username = dict["username"]
    dt = dict["operations"][0]["date"]
    dt = str.replace(dt, "/", "-")
    data = file.read()
    name = "{}_{}".format(username, dt)
    print(name)
# Download a list of files in the 'jsons' container
container_client = blob_service_client.get_container_client(container='jsons')
bl = container_client.list_blobs()
filenames = []
for b in bl:
    filenames.append(b.name)
print(filenames)
# Check if the file already exists in the cloud: and upload it if not
if name in filenames:
    print("File already exists")
else:
    blob_client = blob_service_client.get_blob_client(container='jsons', blob=name)
    blob_client.upload_blob(data, blob_type="BlockBlob")

filenames = container_client.list_blobs()
# Download the blob to a local file
try:
    local_path = "assets/test2/"
    os.mkdir(local_path)
except:
    print("Folder already exists. Proceeding to downlad")
# Add 'AFS' before the .json extension to see both files in the data directory
for blob in filenames:
    with open('{}{}'.format(local_path, blob.name), 'w') as fp: 
        pass
    download_file_path = os.path.join(local_path, str.replace(blob.name ,'.json', 'AFS.json'))
    print("\nDownloading blob to \n\t" + download_file_path)
    with open(file=download_file_path, mode="wb") as download_file:
        download_file.write(container_client.download_blob(blob.name).readall())

# Delete
for blob in filenames:
    container_client.delete_blob(blob.name)
    print("Deleted blob:{}".format(blob.name))


Tester_manual_22:10:12_25-10-2023
['Tester_manual_22:10:12_25-10-2023', 'err_indata.json', 'indata.json', 'test1.json']
File already exists
Folder already exists. Proceeding to downlad


OSError: [Errno 22] Invalid argument: 'assets/test2/Tester_manual_22:10:12_25-10-2023'