In [4]:
import os
import pandas as pd
from datetime import datetime, timedelta

# Step 1: Get current directory
current_dir = os.getcwd()

# Step 2: Load the CSV
csv_path = os.path.join(current_dir, '..', 'data', 'checkpoints', 'checkpoint9_no_2025.csv')
df = pd.read_csv(csv_path)

# Step 3: Parse the date column
print("[INFO] Parsing date column...")
df['Date'] = pd.to_datetime(df['Date'], errors='coerce')
df = df.dropna(subset=['Date'])  # Drop rows where the date couldn't be parsed
df['Date'] = df['Date'].dt.date

# Step 4: Generate all expected dates from 2018 to 2024
start_date = datetime(2018, 1, 1).date()
end_date = datetime(2024, 12, 31).date()
expected_dates = set(start_date + timedelta(days=i) for i in range((end_date - start_date).days + 1))

# Step 5: Get actual dates in the dataset
actual_dates = set(df['Date'])

# Step 6: Find missing dates
print("[INFO] Finding missing dates...")
missing_dates = sorted(list(expected_dates - actual_dates))

# Step 7: Save missing dates to a text file
output_file = os.path.join(current_dir, 'missing_dates.txt')
print(f"[INFO] Writing missing dates to: {output_file}")
with open(output_file, 'w') as f:
    for date in missing_dates:
        f.write(str(date) + '\n')

print(f"[DONE] Found {len(missing_dates)} missing dates.")


  df = pd.read_csv(csv_path)


[INFO] Parsing date column...
[INFO] Finding missing dates...
[INFO] Writing missing dates to: c:\Users\Indel\Documents\gatewayinitiative-lawrencepd\scripts\missing_dates.txt
[DONE] Found 96 missing dates.
