In [None]:
# Step 0: Configure Spark to treat table and column names as case-sensitive
spark.conf.set('spark.sql.caseSensitive', True)

# Step 1: Define SharePoint site and list details
TENANT_HOST = "overdax.sharepoint.com"  # SharePoint Host
SITE_PATH   = "Sandbox"                 # Site Path
LIST_TITLE  = "ListTest"                # List Name
TARGET_TABLE = "ListTest"               # Delta Table in Lakehouse

In [None]:
# Step 2: Retrieve secrets from Azure Key Vault
KEY_VAULT     = "https://pezzott.vault.azure.net/"
CLIENT_ID     = notebookutils.credentials.getSecret(KEY_VAULT, "graph-client-id")
TENANT_ID     = notebookutils.credentials.getSecret(KEY_VAULT, "graph-tenant-id")
CLIENT_SECRET = notebookutils.credentials.getSecret(KEY_VAULT, "graph-secret")

In [None]:
# Step 3: Acquire access token for Microsoft Graph API
import requests

token_url = f"https://login.microsoftonline.com/{TENANT_ID}/oauth2/v2.0/token"
token_resp = requests.post(
    token_url,
    data={
        "client_id": CLIENT_ID,
        "client_secret": CLIENT_SECRET,
        "grant_type": "client_credentials",
        "scope": "https://graph.microsoft.com/.default",  # Graph app perms
    },
)
token_resp.raise_for_status()
access_token = token_resp.json()["access_token"]
headers = {"Authorization": f"Bearer {access_token}", "Accept": "application/json"}

In [None]:
# Step 4: Locate siteId and listId

site = requests.get(
    f"https://graph.microsoft.com/v1.0/sites/{TENANT_HOST}:{SITE_PATH}",
    headers=headers
).json()
site_id = site["id"]

lists = requests.get(
    f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists?$select=id,name,displayName",
    headers=headers
).json()
list_id = next(l["id"] for l in lists.get("value", []) if l["displayName"] == LIST_TITLE)

In [None]:
# Step 5. Read all items with pagination
base = f"https://graph.microsoft.com/v1.0/sites/{site_id}/lists/{list_id}/items"
params = {
    "$expand": "fields",
    "$top": "200"
}

items = []
next_url = base
while next_url:
    r = requests.get(next_url, headers=headers, params=params if next_url == base else None).json()
    items.extend(r.get("value", []))
    next_url = r.get("@odata.nextLink")

In [None]:
# Step 6: Normalize records and save to Delta
records = []
for it in items:
    row = it.get("fields", {}).copy()
    records.append(row)

if not records:
    print("Nenhum item retornardo.")
else:    
    # Create DataFrame
    df = spark.createDataFrame(records)       

    # Exclude unnecessary columns
    cols_to_drop = ["@odata.etag", "id", "ContentType", "Modified", "Created", 
                    "AuthorLookupId", "EditorLookupId", "_UIVersionString", "Attachments",
                    "Edit", "ItemChildCount", "FolderChildCount", "_ComplianceFlags",
                    "_ComplianceTag", "_ComplianceTagWrittenTime", "_ComplianceTagUserId"]     
    df_selected = df.drop(*cols_to_drop)

    # Save to Delta table
    (df_selected.write.mode("append")     
          .format("delta")  
          .saveAsTable(TARGET_TABLE))
    
    display(spark.table(TARGET_TABLE).limit(10))