In [0]:
import os
import io
import sys
import pandas as pd
from azure.identity import DefaultAzureCredential
from azure.storage.filedatalake import DataLakeServiceClient

# ---------------------------------------------------------------------------
# 1) Configuration
# ---------------------------------------------------------------------------
# Update these to match your new file storage details
STORAGE_ACCOUNT_NAME = "mailchimpspnetwork"  # or a new account if changed
NEW_CONTAINER_NAME = "silver"            # e.g. "silver_v2"
NEW_PREFIX = "mailchimp_clean"  

# ---------------------------------------------------------------------------
# 2) Initialize ADLS Client
# ---------------------------------------------------------------------------
credential = DefaultAzureCredential()
service_client = DataLakeServiceClient(
    account_url=f"https://{STORAGE_ACCOUNT_NAME}.dfs.core.windows.net",
    credential=credential
)
new_fs = service_client.get_file_system_client(file_system=NEW_CONTAINER_NAME)

# ---------------------------------------------------------------------------
# 3) Function to list all CSV files in the new path
# ---------------------------------------------------------------------------
def list_csvs_in_new_storage() -> list:
    try:
        paths = new_fs.get_paths(path=NEW_PREFIX)
    except Exception as e:
        print(f"[ERROR] Could not list paths under '{NEW_PREFIX}' in container '{NEW_CONTAINER_NAME}': {e}")
        return []
    
    csv_files = []
    for p in paths:
        if not p.is_directory and p.name.lower().endswith(".csv"):
            csv_files.append(p.name)  # e.g., "mailchimp_clean_v2/SomeList.csv"
    return csv_files

# ---------------------------------------------------------------------------
# 4) Function to load a chosen CSV into a Pandas DataFrame
# ---------------------------------------------------------------------------
def load_csv_from_new_storage(csv_path: str) -> pd.DataFrame:
    """
    Given the full path (e.g. 'mailchimp_clean_v2/SomeList.csv'),
    downloads the file from ADLS, reads into Pandas, and returns the DataFrame.
    """
    file_client = new_fs.get_file_client(csv_path)
    download = file_client.download_file()
    csv_content = download.readall().decode("utf-8")
    df = pd.read_csv(io.StringIO(csv_content))
    return df

# ---------------------------------------------------------------------------
# 5) Main: List files, pick one, load into DataFrame, show basic info
# ---------------------------------------------------------------------------

# 1) List all CSV files
csv_files = list_csvs_in_new_storage()
if not csv_files:
    print(f"[WARNING] No CSV files found under '{NEW_PREFIX}' in container '{NEW_CONTAINER_NAME}'.")
    sys.exit(0)

print("[INFO] Available CSV files in new storage:")
for idx, f in enumerate(csv_files, start=1):
    print(f" {idx}. {f}")

# 2) Prompt user to pick which file to load
selection = input(f"\nSelect a file number (1-{len(csv_files)}): ")
try:
    selection_idx = int(selection) - 1
    if selection_idx < 0 or selection_idx >= len(csv_files):
        raise ValueError("Out of range")
except ValueError:
    print("[ERROR] Invalid selection. Exiting.")
    sys.exit(1)

chosen_file = csv_files[selection_idx]
print(f"[INFO] You chose: {chosen_file}")

# 3) Load into DataFrame
df = load_csv_from_new_storage(chosen_file)
print(f"[INFO] Loaded DataFrame with shape: {df.shape}")

# Display the DataFrame if you're in an interactive environment (Databricks, Jupyter, etc.)
try:
    display(df)
except NameError:
    print("[INFO] 'display()' not available. Printing top 5 rows instead.")
    print(df.head())
