### Codebasics Data Engineering Tutorial: Data Ingestion for Logs

In [1]:
import os
import boto3
from datetime import datetime, timedelta

# ---------- CONFIG ----------
LOGS_FOLDER = "day-wise-logs-data/"
DATE_TRACKER_FILE = "log_date_tracker.txt"

S3_BUCKET = "careplus-data-5173" # this should match the exact bucket name you have setup in AWS S2
S3_PREFIX = "Support-logs/raw/"

from dotenv import load_dotenv
load_dotenv()

os.getenv("REGION")

'ap-south-1'

In [2]:
import os

AWS_CONFIG = {
    "aws_access_key_id": os.getenv("AWS_ACCESS_KEY"),
    "aws_secret_access_key": os.getenv("SECRET_KEY"),
    "region_name": os.getenv("REGION")
}

In [3]:
# ---------- UTILITY FUNCTIONS ----------
def read_last_date(file_path):
    if os.path.exists(file_path):
        with open(file_path, 'r') as f:
            return f.read().strip()
    return "2025-06-30"

def update_last_date(file_path, new_date):
    with open(file_path, 'w') as f:
        f.write(new_date)

def get_next_date(last_date_str):
    last_date = datetime.strptime(last_date_str, "%Y-%m-%d")
    next_date = last_date + timedelta(days=1)
    return next_date.strftime("%Y-%m-%d")


def upload_log_file_to_s3(file_path, bucket, key):
    s3 = boto3.client('s3', **AWS_CONFIG)
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()
    s3.put_object(Bucket=bucket, Key=key, Body=content)
    print(f"âœ… Uploaded log file to s3://{bucket}/{key}")


# ---------- MAIN INGESTION LOGIC ----------
def run_log_ingestion():
    last_date = read_last_date(DATE_TRACKER_FILE)
    next_date = get_next_date(last_date)
    print(last_date, next_date)

    file_name = f"support_logs_{next_date}.log"
    log_file_full_path = os.path.join(LOGS_FOLDER, file_name)
    print(log_file_full_path)
    

    s3_key = f"{S3_PREFIX}support_logs_{next_date}.log"
    upload_log_file_to_s3(log_file_full_path, S3_BUCKET, s3_key)
    update_last_date(DATE_TRACKER_FILE, next_date)
    print(f"ðŸ“… Updated tracker to {next_date}")

In [4]:
# ---------- RUN ----------
if __name__ == "__main__":
    run_log_ingestion()

2025-07-02 2025-07-03
day-wise-logs-data/support_logs_2025-07-03.log
âœ… Uploaded log file to s3://careplus-data-5173/Support-logs/raw/support_logs_2025-07-03.log
ðŸ“… Updated tracker to 2025-07-03
