In [None]:
import pandas as pd
from azure.storage.blob import BlobServiceClient
import io

# Configuration
account_name = "retailstorage11"
account_key = "Key of Azure Account"
container_name = "retail"

print("🔄 Attempting to connect to Azure Blob Storage...")

try:
    # Create blob service client
    account_url = f"https://{account_name}.blob.core.windows.net"
    print(f"📍 Account URL: {account_url}")
    
    blob_service_client = BlobServiceClient(account_url=account_url, credential=account_key)
    
    # Test connection by getting container client
    container_client = blob_service_client.get_container_client(container_name)
    print(f"📂 Container: {container_name}")
    
    # List blobs in container
    print("🔍 Listing blobs...")
    blob_list = list(container_client.list_blobs())
    
    print(f"✅ Successfully connected! Found {len(blob_list)} files:")
    
    if len(blob_list) == 0:
        print("📭 No files found in the container")
    else:
        for i, blob in enumerate(blob_list[:10]):  # Show first 10 files
            print(f"  {i+1}. {blob.name} ({blob.size} bytes)")
        
        if len(blob_list) > 10:
            print(f"  ... and {len(blob_list) - 10} more files")

except Exception as e:
    print(f"❌ Error: {e}")
    print(f"Error type: {type(e).__name__}")


In [None]:
import pandas as pd
from azure.storage.blob import BlobServiceClient
import io
import pyarrow.parquet as pq

account_name = "retailstorage11"
account_key = "Azure Account Key"  # Use your working key
container_name = "retail"

account_url = f"https://{account_name}.blob.core.windows.net"
blob_service_client = BlobServiceClient(account_url=account_url, credential=account_key)
container_client = blob_service_client.get_container_client(container_name)

# Function to read parquet files from blob storage
def read_parquet_from_blob(blob_name):
    try:
        print(f"📥 Reading {blob_name}...")
        blob_client = container_client.get_blob_client(blob_name)
        
        # Download blob content
        blob_data = blob_client.download_blob().readall()
        
        # Read parquet from bytes
        df = pd.read_parquet(io.BytesIO(blob_data))
        print(f"✅ Successfully read {blob_name}")
        print(f"   Shape: {df.shape}")
        return df
        
    except Exception as e:
        print(f"❌ Error reading {blob_name}: {e}")
        return None

# Read the parquet files
print("🔍 Reading parquet files from Azure Blob Storage...\n")

# 1. Read customers data
customers_blob = "bronze/customer/manish040596/azure-data-engineer---multi-source/refs/heads/main/customers.parquet"
customers_df = read_parquet_from_blob(customers_blob)

if customers_df is not None:
    print("\n📊 CUSTOMERS DATA:")
    print("=" * 50)
    print(f"Shape: {customers_df.shape}")
    print(f"Columns: {list(customers_df.columns)}")
    print("\nFirst 5 rows:")
    print(customers_df.head())
    print(f"\nData types:\n{customers_df.dtypes}")

# 2. Read products data
print("\n" + "="*70)
products_blob = "bronze/product/dbo.products.parquet"
products_df = read_parquet_from_blob(products_blob)

if products_df is not None:
    print("\n📦 PRODUCTS DATA:")
    print("=" * 50)
    print(f"Shape: {products_df.shape}")
    print(f"Columns: {list(products_df.columns)}")
    print("\nFirst 5 rows:")
    print(products_df.head())
    print(f"\nData types:\n{products_df.dtypes}")

# 3. Read orders data
print("\n" + "="*70)
store_blob = "bronze/store/dbo.stores.parquet"
df_store = read_parquet_from_blob(store_blob)

if df_store is not None:
    print("\n📋 Store DATA:")
    print("=" * 50)
    print(f"Shape: {df_store.shape}")
    print(f"Columns: {list(df_store.columns)}")
    print("\nFirst 5 rows:")
    print(df_store.head())
    print(f"\nData types:\n{df_store.dtypes}")



# 4. Read order items data  
print("\n" + "="*70)
transaction_blob = "bronze/transaction/dbo.transactions.parquet"
df_transaction = read_parquet_from_blob(transaction_blob)

if df_transaction is not None:
    print("\n Transaction DATA:")
    print("=" * 50)
    print(f"Shape: {df_transaction.shape}")
    print(f"Columns: {list(df_transaction.columns)}")
    print("\nFirst 5 rows:")
    print(df_transaction.head())
    print(f"\nData types:\n{df_transaction.dtypes}")


# 3. Let's also check what other files exist
print("\n" + "="*70)
print("📂 ALL FILES IN CONTAINER:")
print("=" * 50)

blob_list = list(container_client.list_blobs())
parquet_files = [blob.name for blob in blob_list if blob.name.endswith('.parquet')]

print(f"Found {len(parquet_files)} parquet files:")
for i, file in enumerate(parquet_files, 1):
    size_kb = next(blob.size for blob in blob_list if blob.name == file) / 1024
    print(f"  {i}. {file} ({size_kb:.1f} KB)")

# Summary
print(f"\n🎯 SUMMARY:")
print("=" * 50)
if customers_df is not None:
    print(f"✅ Customers: {customers_df.shape[0]} rows, {customers_df.shape[1]} columns")
if products_df is not None:
    print(f"✅ Products: {products_df.shape[0]} rows, {products_df.shape[1]} columns")
if df_store is not None:
    print(f"✅ Store: {df_store.shape[0]} rows, {df_store.shape[1]} columns")
if df_transaction is not None:
    print(f"✅ Transactions: {df_transaction.shape[0]} rows, {df_transaction.shape[1]}columns")

print(f"\n💡 Your DataFrames are ready to use:")
print("   - df_customers: Customer data")
print("   - df_products: Product data") 
print("   - df_store: Store data")
print("   - df_transaction: Transaction data")

In [None]:
display(df_transaction)


In [None]:
display(customers_df)
