In [6]:
import pandas as pd
import numpy as np
import os

# Set file paths (Make sure they match your actual file locations)
file1_path = "/Users/Grace/Desktop/2025-studio/project3/mega_millions_jackpot_history.csv"
file2_path = "/Users/Grace/Desktop/2025-studio/project3/scraped_lottery_jackpot_history.csv"

# Load datasets
df1 = pd.read_csv(file1_path)
df2 = pd.read_csv(file2_path)

# Convert 'Draw Date' in df1 to a standard date format (MMM-DD-YY)
df1['Draw Date'] = pd.to_datetime(df1['Draw Date'].str.replace(r'^[A-Z]{3} ', '', regex=True), format="%m/%d/%y")

# Convert 'Date' in df2 to datetime
df2['Date'] = pd.to_datetime(df2['Date'], format="%d-%b-%y")

# Function to convert jackpot values to numeric (millions)
def convert_jackpot(value):
    value = value.replace('$', '').replace(',', '').lower()
    if 'billion' in value:
        return float(value.replace(' billion', '')) * 1000  # Convert billion to million
    elif 'million' in value:
        return float(value.replace(' million', ''))
    return np.nan  # Handle unexpected values

# Apply conversion to 'Jackpot' column
df2['Jackpot'] = df2['Jackpot'].apply(convert_jackpot)

# Merge datasets on the closest date
merged_df = pd.merge_asof(df1.sort_values('Draw Date'), df2.sort_values('Date'), left_on='Draw Date', right_on='Date')

# Select relevant columns
merged_df = merged_df[['Draw Date', 'Jackpot_his', 'Jackpot']]
merged_df.rename(columns={'Jackpot_his': 'Advertised Jackpot', 'Jackpot': 'Actual Winning Jackpot'}, inplace=True)

# Define save path for the merged CSV
save_path = "/Users/Grace/Desktop/2025-studio/project3/merged_jackpot_data.csv"

# Ensure the directory exists before saving
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# Save merged data to CSV
merged_df.to_csv(save_path, index=False)

# Print confirmation
print(f"Merged dataset saved successfully at: {save_path}")

# Display first few rows
print(merged_df.head())


Merged dataset saved successfully at: /Users/Grace/Desktop/2025-studio/project3/merged_jackpot_data.csv
   Draw Date  Advertised Jackpot  Actual Winning Jackpot
0 2024-12-10           619000000                   810.0
1 2024-12-13           695000000                   810.0
2 2024-12-17           760000000                   810.0
3 2024-12-20           862000000                   810.0
4 2024-12-24          1000000000                   810.0
