In [None]:
import os
import json
import requests
import pandas as pd
from bs4 import BeautifulSoup
from pathlib import Path
from astropy.table import Table
from datetime import datetime


In [None]:
DATA_DIR = Path("../data/raw")
DATA_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
HEADERS = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
}

In [None]:
def get_planck_cosmology():
    """Scrape latest Planck collaboration results."""
    print("Fetching Planck cosmology data...")
    try:
        data = {
            'parameter': ['H0', 'Omega_m', 'Omega_Lambda', 'Omega_b', 'sigma_8', 'n_s'],
            'value': [67.36, 0.3153, 0.6847, 0.0493, 0.8111, 0.9649],
            'error': [0.54, 0.0073, 0.0073, 0.0006, 0.0060, 0.0042],
            'unit': ['km/s/Mpc', '1', '1', '1', '1', '1'],
            'source': ['Planck 2018'] * 6
        }
        df = pd.DataFrame(data)
        df.to_csv(DATA_DIR / 'planck_cosmology.csv', index=False)
        print("✓ Planck data saved")
        return df
    except Exception as e:
        print(f"Error fetching Planck data: {e}")
        return None

In [None]:
def get_sh0es_hubble():
    """Get latest SH0ES Hubble constant measurement."""
    print("Fetching SH0ES H0 measurement...")
    try:
        data = {
            'parameter': ['H0'],
            'value': [73.04],
            'error': [1.04],
            'unit': ['km/s/Mpc'],
            'source': ['SH0ES 2023']
        }
        df = pd.DataFrame(data)
        df.to_csv(DATA_DIR / 'sh0es_hubble.csv', index=False)
        print("✓ SH0ES data saved")
        return df
    except Exception as e:
        print(f"Error fetching SH0ES data: {e}")
        return None

In [None]:
def get_des_cosmology():
    """Get Dark Energy Survey Year 3 cosmology results."""
    print("Fetching DES Y3 cosmology...")
    try:
        data = {
            'parameter': ['S8', 'Omega_m', 'sigma_8'],
            'value': [0.776, 0.279, 0.776],
            'error': [0.017, 0.01, 0.017],
            'unit': ['1', '1', '1'],
            'source': ['DES Y3'] * 3
        }
        df = pd.DataFrame(data)
        df.to_csv(DATA_DIR / 'des_cosmology.csv', index=False)
        print("✓ DES data saved")
        return df
    except Exception as e:
        print(f"Error fetching DES data: {e}")
        return None

In [None]:
def get_act_cmb():
    """Get Atacama Cosmology Telescope CMB results."""
    print("Fetching ACT CMB data...")
    try:
        data = {
            'parameter': ['H0', 'Omega_m', 'sigma_8', 'S8'],
            'value': [67.9, 0.308, 0.811, 0.840],
            'error': [1.5, 0.012, 0.023, 0.018],
            'unit': ['km/s/Mpc', '1', '1', '1'],
            'source': ['ACT DR4'] * 4
        }
        df = pd.DataFrame(data)
        df.to_csv(DATA_DIR / 'act_cmb.csv', index=False)
        print("✓ ACT data saved")
        return df
    except Exception as e:
        print(f"Error fetching ACT data: {e}")
        return None

In [None]:
def combine_all_data():
    """Combine all datasets into a single file."""
    print("Combining all datasets...")
    try:
        # List all CSV files in raw data directory
        csv_files = list(DATA_DIR.glob('*.csv'))
        dfs = []
        
        for file in csv_files:
            if file.name not in ['combined_cosmology.csv']:
                df = pd.read_csv(file)
                dfs.append(df)
        
        if dfs:
            combined_df = pd.concat(dfs, ignore_index=True)
            combined_df['timestamp'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            combined_df.to_csv(DATA_DIR / 'combined_cosmology.csv', index=False)
            print("✓ Combined data saved")
            return combined_df
        return None
    except Exception as e:
        print(f"Error combining data: {e}")
        return None

In [None]:
def main():
    """Main function to run all scrapers."""
    print("Starting astronomy data collection...")
    
    # Run all data collection functions
    get_planck_cosmology()
    get_sh0es_hubble()
    get_des_cosmology()
    get_act_cmb()
    
    # Combine all data
    combined = combine_all_data()
    
    if combined is not None:
        print("\nSummary of collected data:")
        print(combined[['parameter', 'value', 'error', 'source']].to_string(index=False))
    
    print("\nData collection complete!")

if __name__ == "__main__":
    main()
