# Supabase Data Migration

Use this notebook to upload your local model and data artifacts to **Supabase Storage** so your API can download them.

In [1]:
# import os
# from pathlib import Path
# from dotenv import load_dotenv
# from supabase import create_client, Client

# # 1. Load Environment Variables (SUPABASE_URL, SUPABASE_KEY)
# load_dotenv(override=True)

# url = os.environ.get("SUPABASE_URL")
# key = os.environ.get("SUPABASE_KEY")

# if not url or not key:
#     raise ValueError("‚ùå Missing SUPABASE_URL or SUPABASE_KEY in .env file")

# print(f"‚úÖ Credentials found for: {url}")
import os
from pathlib import Path
from dotenv import load_dotenv
from supabase import create_client, Client

# 1. Load Environment Variables
load_dotenv(override=True)

# Use SECRET_KEY for administrative tasks like uploading
url = os.environ.get("SUPABASE_URL", "").strip().replace('"', '').replace("'", "")
key = os.environ.get("SUPABASE_SECRET_KEY", "").strip().replace('"', '').replace("'", "")

# Force trailing slash and remove any double slashes that might occur
url = url.rstrip("/") + "/"

if not url or not key:
    raise ValueError("‚ùå Missing credentials in .env file. Ensure SUPABASE_URL and SUPABASE_SECRET_KEY are set.")

print(f"‚úÖ Client configured for: {url}")

‚úÖ Client configured for: https://nbjyhxzkwsxprivrteaw.supabase.co/


In [2]:
# 2. Initialize Supabase Client
supabase: Client = create_client(url, key)
bucket_name = "housing-data"

# Check if bucket exists, or create it (requires appropriate permissions)
try:
    buckets = supabase.storage.list_buckets()
    bucket_names = [b.name for b in buckets]
    if bucket_name not in bucket_names:
        print(f"‚ö†Ô∏è Bucket '{bucket_name}' not found. Please create it in the Supabase Dashboard -> Storage -> New Bucket.")
        print("   Make sure to set it as PUBLIC if you want easy public access, or PRIVATE for authenticated download (our API uses authenticated).")
    else:
        print(f"‚úÖ Bucket '{bucket_name}' exists.")
except Exception as e:
    print(f"‚ùå Error connecting to Supabase: {e}")

Storage endpoint URL should have a trailing slash.
‚úÖ Bucket 'housing-data' exists.


In [3]:
# 3. Upload Functions

def upload_file(local_path: str, remote_path: str):
    path_obj = Path(local_path)
    if not path_obj.exists():
        print(f"‚ùå File not found: {local_path}")
        return

    print(f"üì§ Uploading {local_path} -> {bucket_name}/{remote_path} ...")
    
    try:
        with open(local_path, "rb") as f:
            supabase.storage.from_(bucket_name).upload(
                file=f,
                path=remote_path,
                file_options={"cache-control": "3600", "upsert": "true"}
            )
        print("   ‚úÖ Upload successful!")
    except Exception as e:
        print(f"   ‚ùå Upload failed: {e}")


In [4]:
# 4. Upload Code

# Upload Model
upload_file(
    local_path="../models/xgb_best_model.pkl", 
    remote_path="models/xgb_best_model.pkl"
)

# Upload Training Data (for features check)
upload_file(
    local_path="../data/processed/feature_engineered_train.csv", 
    remote_path="data/processed/feature_engineered_train.csv"
)

üì§ Uploading ../models/xgb_best_model.pkl -> housing-data/models/xgb_best_model.pkl ...
   ‚úÖ Upload successful!
üì§ Uploading ../data/processed/feature_engineered_train.csv -> housing-data/data/processed/feature_engineered_train.csv ...
   ‚ùå Upload failed: {'statusCode': 413, 'error': Payload too large, 'message': The object exceeded the maximum allowed size}


In [5]:
# 5. Create and Upload a "Tiny" version of the training data (just for column names)
# This avoids the 181MB 413 error!

import pandas as pd

local_full_path = "../data/processed/feature_engineered_train.csv"
local_tiny_path = "../data/processed/train_schema_only.csv"
remote_path = "data/processed/feature_engineered_train.csv"

if os.path.exists(local_full_path):
    print("‚úÇÔ∏è Creating tiny schema file...")
    # Read only the first 5 rows
    df_tiny = pd.read_csv(local_full_path, nrows=5)
    # Save it locally
    df_tiny.to_csv(local_tiny_path, index=False)
    
    # Upload this tiny file to the same location the API expects
    upload_file(local_tiny_path, remote_path)
else:
    print("‚ùå Error: Original training file not found to create schema.")

‚úÇÔ∏è Creating tiny schema file...
üì§ Uploading ../data/processed/train_schema_only.csv -> housing-data/data/processed/feature_engineered_train.csv ...
   ‚úÖ Upload successful!
