# Create containers in Azure Blob Storage

In [None]:
from azure.storage.blob import BlobServiceClient
import configparser

In [None]:
config = configparser.ConfigParser()
config.read('config/prj_cfg_local.txt')

storage_con_str = config.get("AZURE", "STORAGE_ACCOUNT_CONN_STR")
storage_account = "jdata01"

raw_data_container = "raw"     
dim_model_container = "dim-model-azdb"
dim_model_container_new = "dim-model-azdb-new"
preprocessed_container = "preprocessed"

In [None]:
blob_service_client = BlobServiceClient.from_connection_string(storage_con_str)

In [None]:
for container_name in [raw_data_container,dim_model_container,dim_model_container_new,preprocessed_container]:    
    try:    
        blob_service_client.create_container(container_name)
        print(f"Container {container_name} was created")
    except:
        print(f"Container {container_name} was not created, it possibly already exists")

# Upload local data to Azure Blob Storage

In [None]:
from azure.storage.blob import BlobClient
import glob
import pathlib

In [None]:
data_parent = 'data'
path_global_listings = 'airbnb-listings.csv'
path_city_listings = 'cities/*/*/listings.csv'
path_city_reviews = 'cities/*/*/reviews.csv'
path_city_temperature = "weather/ECA_blend_tg/*.txt"
path_city_rain = "weather/ECA_blend_rr/*.txt"

In [None]:
def upload_to_azure(path):
    for filepath in glob.glob(str(pathlib.Path(data_parent,*(path.split("/"))))):
        print("/".join(filepath.split("/")[1:]))   

        blob_client = blob_service_client.get_blob_client(container=raw_data_container, blob="/".join(filepath.split("/")[1:]))

        with open(filepath, "rb") as data:
            blob_client.upload_blob(data)

In [None]:
upload_to_azure(path_global_listings)

In [None]:
upload_to_azure(path_city_listings)

In [None]:
upload_to_azure(path_city_reviews)

In [None]:
upload_to_azure(path_city_temperature)

In [None]:
upload_to_azure(path_city_rain)

# Move files between containers

In [None]:
source_container = dim_model_container_new
target_container = dim_model_container

source_container_client = blob_service_client.get_container_client(source_container)
target_container_client = blob_service_client.get_container_client(target_container)

blobs_source = source_container_client.list_blobs()
try:
    blobs_source.next()
except StopIteration:
    raise ValueError("Source container is empty. Aborted.")

blobs_target = target_container_client.list_blobs()          
print("Deleting target container blobs if existing ...")    
for blob in blobs_target:
    blob_target = blob_service_client.get_blob_client(target_container, blob.name)             
    blob_target.delete_blob()        
    print(f"{blob_target.url} deleted")   

blobs_source = source_container_client.list_blobs()
for blob in blobs_source:
    blob_target = blob_service_client.get_blob_client(target_container, blob.name) 
    blob_source_url = f"https://{storage_account}.blob.core.windows.net/{source_container}/"+blob.name
    blob_target.start_copy_from_url(blob_source_url)
    print(f"Copied {blob_source_url} to {blob_target.url}")
    blob_source = blob_service_client.get_blob_client(source_container, blob.name) 
    blob_source.delete_blob()
    print(f"Deleted {blob_source_url}") 