In [1]:
import os
from dotenv import load_dotenv

import pandas as pd
from azure.storage.blob import BlobServiceClient

load_dotenv()

CONNECTION_STRING = os.getenv("AZURE_STORAGE_CONNECTION_STRING")
CONTAINER_NAME = os.getenv("AZURE_STORAGE_CONTAINER", "ml-data")

if not CONNECTION_STRING:
    raise ValueError("AZURE_STORAGE_CONNECTION_STRING not set in environment")

print("Using container:", CONTAINER_NAME)

Using container: ml-data


In [2]:
blob_service_client = BlobServiceClient.from_connection_string(CONNECTION_STRING)

container_client = blob_service_client.get_container_client(CONTAINER_NAME)

# Ensure the container exists (create if missing)
try:
    if not container_client.exists():
        container_client.create_container()
        print(f"Created container: {CONTAINER_NAME}")
    else:
        print(f"Container already exists: {CONTAINER_NAME}")
except Exception as e:
    print("Failed to check/create container:", e)
    raise

ValueError: Connection string is either blank or malformed.

Day 2 


In [None]:
from sklearn.datasets import load_breast_cancer

data = load_breast_cancer(as_frame=True)
df = data.frame

local_csv_path = "day1_breast_cancer.csv"
df.to_csv(local_csv_path, index=False)
local_csv_path

In [None]:
blob_name = "raw/day1_breast_cancer.csv"  # folder-like path in container

blob_client = container_client.get_blob_client(blob_name)

with open(local_csv_path, "rb") as f:
    blob_client.upload_blob(f, overwrite=True)

print(f"Uploaded {local_csv_path} to blob '{blob_name}'")

In [None]:
print("Blobs in container:")
for blob in container_client.list_blobs():
    print("-", blob.name)

In [None]:
download_path = "downloaded_day1_breast_cancer.csv"

with open(download_path, "wb") as f:
    download_stream = blob_client.download_blob()
    f.write(download_stream.readall())

print(f"Downloaded blob to {download_path}")

df_downloaded = pd.read_csv(download_path)
df_downloaded.head()