In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

def scrape_mega_millions_history():
    """
    Scrape Mega Millions jackpot history data from screenshot.
    """
    # Using sample data from the screenshot since we're having issues with live scraping
    # Sample data extracted from the screenshot
    data = [
        ['FRI 03/07/25', '$233,000,000', '+$18,000,000'],
        ['TUE 03/04/25', '$215,000,000', '+$18,000,000'],
        ['FRI 02/28/25', '$197,000,000', '+$16,000,000'],
        ['TUE 02/25/25', '$181,000,000', '+$16,000,000'],
        ['FRI 02/21/25', '$165,000,000', '+$20,000,000'],
        ['TUE 02/18/25', '$145,000,000', '+$16,000,000'],
        ['FRI 02/14/25', '$129,000,000', '+$19,000,000'],
        ['TUE 02/11/25', '$110,000,000', '+$16,000,000'],
        ['FRI 02/07/25', '$94,000,000', '+$17,000,000'],
        ['TUE 02/04/25', '$77,000,000', '+$18,000,000'],
        ['FRI 01/31/25', '$59,000,000', '+$15,000,000'],
        ['TUE 01/28/25', '$44,000,000', '+$16,000,000'],
        ['FRI 01/24/25', '$28,000,000', '+$8,000,000'],
        ['TUE 01/21/25', '$20,000,000', '-$93,000,000'],
        ['FRI 01/17/25', '$113,000,000', '+$18,000,000'],
        ['TUE 01/14/25', '$95,000,000', '+$18,000,000'],
        ['FRI 01/10/25', '$77,000,000', '+$15,000,000'],
        ['TUE 01/07/25', '$62,000,000', '+$20,000,000'],
        ['FRI 01/03/25', '$42,000,000', '+$22,000,000'],
        ['TUE 12/31/24', '$20,000,000', '-$1,200,000,000'],
        ['FRI 12/27/24', '$1,220,000,000', '+$220,000,000'],
        ['TUE 12/24/24', '$1,000,000,000', '+$138,000,000'],
        ['FRI 12/20/24', '$862,000,000', '+$102,000,000'],
        ['TUE 12/17/24', '$760,000,000', '+$65,000,000'],
        ['FRI 12/13/24', '$695,000,000', '+$76,000,000'],
        ['TUE 12/10/24', '$619,000,000', '+$40,000,000']
    ]
    
    # Create DataFrame from the data
    return pd.DataFrame(data, columns=['Draw Date', 'Jackpot', 'Change'])


def process_data(df):
    """Process and clean the dataframe without using regex."""
    # Clean the Jackpot column - remove $ and commas
    clean_jackpots = []
    for jackpot in df['Jackpot']:
        # Remove $ and commas manually
        clean_value = jackpot.replace('$', '').replace(',', '')
        clean_jackpots.append(float(clean_value))
    df['Jackpot_Clean'] = clean_jackpots
    
    # Clean the Change column - handle +/- signs and extract value
    clean_changes = []
    change_directions = []
    
    for change in df['Change']:
        if '+' in change:
            # Positive change
            clean_value = change.replace('+', '').replace('$', '').replace(',', '')
            clean_changes.append(float(clean_value))
            change_directions.append('increase')
        elif '-' in change:
            # Negative change
            clean_value = change.replace('-', '').replace('$', '').replace(',', '')
            clean_changes.append(-float(clean_value))
            change_directions.append('decrease')
        else:
            # No change
            clean_changes.append(0.0)
            change_directions.append('unchanged')
    
    df['Change_Clean'] = clean_changes
    df['Change_Direction'] = change_directions
    
    return df


def analyze_data(df):
    """Analyze the processed data."""
    analysis = {
        'total_draws': len(df),
        'largest_jackpot': df.loc[df['Jackpot_Clean'].idxmax()],
        'smallest_jackpot': df.loc[df['Jackpot_Clean'].idxmin()],
        'largest_increase': df.loc[df[df['Change_Clean'] > 0]['Change_Clean'].idxmax()] if any(df['Change_Clean'] > 0) else None,
        'largest_decrease': df.loc[df[df['Change_Clean'] < 0]['Change_Clean'].idxmin()] if any(df['Change_Clean'] < 0) else None,
        'average_jackpot': df['Jackpot_Clean'].mean(),
        'median_jackpot': df['Jackpot_Clean'].median(),
        'increases_count': (df['Change_Direction'] == 'increase').sum(),
        'decreases_count': (df['Change_Direction'] == 'decrease').sum(),
        'unchanged_count': (df['Change_Direction'] == 'unchanged').sum()
    }
    
    return analysis


def main():
    """Main function to run the scraper."""
    print("Scraping Mega Millions jackpot history...")
    
    # Get the data
    df = scrape_mega_millions_history()
    
    # Process the data
    df = process_data(df)
    
    # Save to CSV
    df.to_csv('mega_millions_jackpot_history.csv', index=False)
    print("Data saved to mega_millions_jackpot_history.csv")
    
    # Display the first few rows
    print("\nFirst few rows of data:")
    print(df.head())
    
    # Analyze the data
    analysis = analyze_data(df)
    
    # Display the analysis
    print("\nAnalysis of jackpot history:")
    print(f"Total number of draws: {analysis['total_draws']}")
    print(f"Largest jackpot: ${analysis['largest_jackpot']['Jackpot_Clean']:,.2f} on {analysis['largest_jackpot']['Draw Date']}")
    print(f"Smallest jackpot: ${analysis['smallest_jackpot']['Jackpot_Clean']:,.2f} on {analysis['smallest_jackpot']['Draw Date']}")
    
    if analysis['largest_increase'] is not None:
        print(f"Largest increase: ${analysis['largest_increase']['Change_Clean']:,.2f} on {analysis['largest_increase']['Draw Date']}")
    
    if analysis['largest_decrease'] is not None:
        print(f"Largest decrease: ${analysis['largest_decrease']['Change_Clean']:,.2f} on {analysis['largest_decrease']['Draw Date']}")
    
    print(f"Average jackpot: ${analysis['average_jackpot']:,.2f}")
    print(f"Median jackpot: ${analysis['median_jackpot']:,.2f}")
    print(f"Number of increases: {analysis['increases_count']}")
    print(f"Number of decreases: {analysis['decreases_count']}")
    print(f"Number of unchanged jackpots: {analysis['unchanged_count']}")


if __name__ == "__main__":
    main()

Scraping Mega Millions jackpot history...
Data saved to mega_millions_jackpot_history.csv

First few rows of data:
      Draw Date       Jackpot        Change  Jackpot_Clean  Change_Clean  \
0  FRI 03/07/25  $233,000,000  +$18,000,000    233000000.0    18000000.0   
1  TUE 03/04/25  $215,000,000  +$18,000,000    215000000.0    18000000.0   
2  FRI 02/28/25  $197,000,000  +$16,000,000    197000000.0    16000000.0   
3  TUE 02/25/25  $181,000,000  +$16,000,000    181000000.0    16000000.0   
4  FRI 02/21/25  $165,000,000  +$20,000,000    165000000.0    20000000.0   

  Change_Direction  
0         increase  
1         increase  
2         increase  
3         increase  
4         increase  

Analysis of jackpot history:
Total number of draws: 26
Largest jackpot: $1,220,000,000.00 on FRI 12/27/24
Smallest jackpot: $20,000,000.00 on TUE 01/21/25
Largest increase: $220,000,000.00 on FRI 12/27/24
Largest decrease: $-1,200,000,000.00 on TUE 12/31/24
Average jackpot: $279,307,692.31
Median jac