In [None]:
import os
import pandas as pd
import requests
from datetime import datetime

# Prepare directories
data_lake_dir = 'data_lake'
csv_raw_dir = os.path.join(data_lake_dir, 'raw/transactions')
api_raw_dir = os.path.join(data_lake_dir, 'raw/customers')
os.makedirs(csv_raw_dir, exist_ok=True)
os.makedirs(api_raw_dir, exist_ok=True)

# Log file setup
log_file = os.path.join(data_lake_dir, 'ingestion.log')
today = datetime.now().strftime('%Y%m%d')

def write_log(message, level='INFO'):
    timestamp = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
    with open(log_file, 'a') as f:
        f.write(f"{timestamp} {level}: {message}\n")

def ingest_csv(csv_url):
    output_path = os.path.join(csv_raw_dir, f'transactions_{today}.csv')
    try:
        df = pd.read_csv(csv_url)
        df.to_csv(output_path, index=False)
        write_log(f'Ingested CSV: {csv_url} -> {output_path}', 'INFO')
        print(f"Successfully saved CSV data to {output_path}")
    except Exception as e:
        write_log(f'Failed to ingest CSV {csv_url}: {e}', 'ERROR')
        print(f"Error in ingesting CSV: {e}")

def ingest_api(api_url):
    output_path = os.path.join(api_raw_dir, f'random_users_{today}.csv')
    try:
        response = requests.get(api_url)
        response.raise_for_status()
        users = response.json()['results']
        df = pd.json_normalize(users)
        df.to_csv(output_path, index=False)
        write_log(f'Ingested API: {api_url} -> {output_path}', 'INFO')
        print(f"Successfully saved API data to {output_path}")
    except Exception as e:
        write_log(f'Failed to ingest API {api_url}: {e}', 'ERROR')
        print(f"Error in ingesting API: {e}")

# Run both ingestions
github_csv_url = 'https://raw.githubusercontent.com/datawithadi/ACI_2/refs/heads/main/HR_Employee_Attrition.csv'
ingest_csv(github_csv_url)

api_endpoint = 'https://randomuser.me/api/?results=500'
ingest_api(api_endpoint)

# Display log contents
with open(log_file) as f:
    print("\n--- Ingestion Log ---")
    print(f.read())


Successfully saved CSV data to data_lake/raw/transactions/transactions_20250824.csv
Successfully saved API data to data_lake/raw/customers/random_users_20250824.csv

--- Ingestion Log ---
2025-08-24 10:01:05 INFO: Ingested CSV: https://raw.githubusercontent.com/datawithadi/ACI_2/refs/heads/main/HR_Employee_Attrition.csv -> data_lake/raw/transactions/transactions_20250824.csv
2025-08-24 10:01:05 INFO: Ingested API: https://randomuser.me/api/?results=100 -> data_lake/raw/customers/random_users_20250824.csv

