In [None]:
import os
import sys
import logging
from pathlib import Path

# Project root configuration
PROJECT_ROOT = Path(__file__).parent
sys.path.insert(0, str(PROJECT_ROOT))

# Set up logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler(PROJECT_ROOT / 'wmap_project.log'),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

# Configuration management
class ProjectConfig:
    """
    Centralized configuration for the WMAP project
    """
    # Database configuration
    DATABASE_CONFIG = {
        'host': 'localhost',
        'database': 'wmap_database',
        'user': 'astronomical_user',
        'password': 'secure_password'
    }

    # Data source paths
    DATA_SOURCES = {
        'wmap_raw': PROJECT_ROOT / 'data' / 'raw',
        'wmap_processed': PROJECT_ROOT / 'data' / 'processed',
    }

    # Ensure data directories exist
    @classmethod
    def setup_directories(cls):
        for path in cls.DATA_SOURCES.values():
            path.mkdir(parents=True, exist_ok=True)
        logger.info("Data directories initialized")

def main():
    """
    Main entry point for the WMAP Project
    """
    try:
        # Setup project directories
        ProjectConfig.setup_directories()

        # Import and run main project components
        from src.data_acquisition import download_wmap_data
        from src.database_manager import DatabaseManager
        from src.data_analysis import perform_analysis

        # Download WMAP data
        download_wmap_data()

        # Initialize database
        db_manager = DatabaseManager(ProjectConfig.DATABASE_CONFIG)
        db_manager.create_schema()
        db_manager.import_data()

        # Perform scientific analysis
        perform_analysis()

        logger.info("WMAP Project completed successfully")

    except Exception as e:
        logger.error(f"Project execution failed: {e}", exc_info=True)
        sys.exit(1)

if __name__ == '__main__':
    main()

In [None]:
import requests
import logging
import pandas as pd
from pathlib import Path

logger = logging.getLogger(__name__)

def download_wmap_data(output_dir='data/raw'):
    """
    Download WMAP dataset from NASA's repository

    Args:
        output_dir (str): Directory to save downloaded data
    """
    # WMAP data sources (example URLs - replace with actual NASA links)
    wmap_data_sources = {
        'temperature_map': 'https://lambda.gsfc.nasa.gov/data/map/dr5/sky_maps/temperature_map.fits',
        'polarization_map': 'https://lambda.gsfc.nasa.gov/data/map/dr5/sky_maps/polarization_map.fits',
        'cosmological_parameters': 'https://lambda.gsfc.nasa.gov/data/map/dr5/parameters/cosmological_parameters.csv'
    }

    # Ensure output directory exists
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Download each data source
    for name, url in wmap_data_sources.items():
        try:
            response = requests.get(url, stream=True)
            response.raise_for_status()

            # Save file
            file_path = output_path / f'{name}.fits' if name != 'cosmological_parameters' else output_path / f'{name}.csv'

            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=8192):
                    f.write(chunk)

            logger.info(f'Successfully downloaded {name}')

        except requests.RequestException as e:
            logger.error(f'Error downloading {name}: {e}')

def preprocess_wmap_data(input_dir='data/raw', output_dir='data/processed'):
    """
    Preprocess downloaded WMAP data

    Args:
        input_dir (str): Directory with raw data
        output_dir (str): Directory to save processed data
    """
    import astropy.io.fits as fits

    # Ensure output directory exists
    output_path = Path(output_dir)
    output_path.mkdir(parents=True, exist_ok=True)

    # Process temperature map
    try:
        # Read FITS file
        with fits.open(Path(input_dir) / 'temperature_map.fits') as hdul:
            temperature_data = hdul[1].data

            # Convert to DataFrame
            temp_df = pd.DataFrame(temperature_data)
            temp_df.to_csv(output_path / 'processed_temperature.csv', index=False)

            logger.info('Processed temperature map')

    except Exception as e:
        logger.error(f'Error processing temperature map: {e}')

# Standalone execution for testing
if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)
    download_wmap_data()
    preprocess_wmap_data()

In [None]:
import logging
import pandas as pd
import sqlalchemy as sa
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from geoalchemy2 import Geometry

logger = logging.getLogger(__name__)
Base = declarative_base()

class WMAPObservation(Base):
    """
    SQLAlchemy model for WMAP observations
    """
    __tablename__ = 'wmap_observations'

    id = sa.Column(sa.Integer, primary_key=True)
    frequency_band = sa.Column(sa.Float, nullable=False)
    temperature = sa.Column(sa.Float, nullable=False)
    sky_coordinate = sa.Column(Geometry('POINT'), nullable=False)
    observation_time = sa.Column(sa.DateTime, nullable=False)
    polarization = sa.Column(sa.Float)

class DatabaseManager:
    def __init__(self, db_config):
        """
        Initialize database connection

        Args:
            db_config (dict): Database configuration parameters
        """
        self.engine = sa.create_engine(
            f"postgresql://{db_config['user']}:{db_config['password']}@{db_config['host']}/{db_config['database']}"
        )
        self.Session = sessionmaker(bind=self.engine)

    def create_schema(self):
        """
        Create database schema
        """
        try:
            Base.metadata.create_all(self.engine)
            logger.info("Database schema created successfully")
        except Exception as e:
            logger.error(f"Error creating schema: {e}")

    def import_data(self, data_path='data/processed/processed_temperature.csv'):
        """
        Import processed WMAP data into database

        Args:
            data_path (str): Path to processed CSV file
        """
        try:
            # Read processed data
            df = pd.read_csv(data_path)

            # Create session
            session = self.Session()

            # Convert DataFrame to database objects
            observations = []
            for _, row in df.iterrows():
                obs = WMAPObservation(
                    frequency_band=row.get('frequency', 0),
                    temperature=row.get('temperature', 0),
                    sky_coordinate=f'POINT({row.get("longitude", 0)} {row.get("latitude", 0)})',
                    observation_time=pd.to_datetime(row.get('timestamp', pd.Timestamp.now())),
                    polarization=row.get('polarization', None)
                )
                observations.append(obs)

            # Bulk insert
            session.add_all(observations)
            session.commit()

            logger.info(f"Imported {len(observations)} observations")

        except Exception as e:
            logger.error(f"Error importing data: {e}")
        finally:
            session.close()

    def perform_advanced_query(self):
        """
        Perform advanced SQL queries on WMAP data
        """
        try:
            session = self.Session()

            # Example query: Find observations in specific frequency range
            query = session.query(WMAPObservation).filter(
                sa.and_(
                    WMAPObservation.frequency_band.between(20, 100),
                    WMAPObservation.temperature > 2.7
                )
            )

            results = query.all()
            logger.info(f"Advanced query returned {len(results)} results")

            return results

        except Exception as e:
            logger.error(f"Error in advanced query: {e}")
        finally:
            session.close()

# Standalone execution for testing
if __name__ == '__main__':
    logging.basicConfig(level=logging.INFO)

    # Example configuration
    config = {
        'host': 'localhost',
        'database': 'wmap_database',
        'user': 'astronomical_user',
        'password': 'secure_password'
    }

    db_manager = DatabaseManager(config)
    db_manager.create_schema()
    db_manager.import_data()
    db_manager.perform_advanced_query()

In [None]:
import logging
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from astropy.coordinates import SkyCoord
from astropy import units as u
from pyspark.sql import SparkSession
from pyspark.ml.feature import VectorAssembler
from pyspark.ml.clustering import KMeans

logger = logging.getLogger(__name__)

def perform_analysis(data_path='data/processed/processed_temperature.csv'):
    """
    Perform comprehensive analysis of WMAP data

    Args:
        data_path (str): Path to processed WMAP data
    """
    try:
        # Load data
        df = pd.read_csv(data_path)

        # Coordinate conversion
        sky_coords = SkyCoord(
            ra=df['longitude'] * u.degree,
            dec=df['latitude'] * u.degree,
            frame='icrs'
        )

        # Temperature analysis
        temp_stats = {
            'mean': df['temperature'].mean(),
            'median': df['temperature'].median(),
            'std_dev': df['temperature'].std()
        }
        logger.info(f"Temperature Statistics: {temp_stats}")

        # Visualization: Temperature Distribution
        plt.figure(figsize=(10, 6))
        plt.hist(df['temperature'], bins=50, edgecolor='black')
        plt.title('WMAP Temperature Distribution')
        plt.xlabel('Temperature (K)')
        plt.ylabel('Frequency')
        plt.savefig('results/temperature_distribution.png')
        plt.close()

        # Distributed Processing with Spark
        spark_analysis(df)

        return temp_stats

    except Exception as e:
        logger.error(f"Analysis error: {e}")

def spark_analysis(df):
    """
    Perform distributed data processing using PySpark

    Args:
        df (pandas.DataFrame): Input DataFrame
    """
    try:
        # Initialize Spark Session
        spark = SparkSession.builder \
            .appName("WMAP Distributed Analysis") \
            .getOrCreate()

        # Convert Pandas DataFrame to Spark DataFrame
        spark_df = spark.createDataFrame(df)

        # Prepare data for clustering
        feature_cols = ['longitude', 'latitude', 'temperature']
        assembler = VectorAssembler(inputCols=feature_cols, outputCol="features")
        assembled_df = assembler.transform(spark_df)

        # K-means clustering
        kmeans = KMeans().setK(5).setSeed(42)
        model = kmeans.fit(assembled_df)

        # Get cluster centers
        centers = model.clusterCenters()
        logger.info("Cluster Centers:")
        for i, center in enumerate(centers):
            logger.info(f"Cluster {i}: {center}")

        spark.stop()

    except Exception as e:
        logger.error(f"Spark analysis error: {e}")

def advanced_astropy_analysis(data_path='data/processed/processed_temperature.csv'):
    """
    Advanced astrophysical analysis using Astropy

    Args:
        data_path (str): Path to processed WMAP data
    """
    try:
        # Load data
        df = pd.read_csv(data_path)

        # Create SkyCoord object
        coords = SkyCoord(
            ra=df['longitude'] * u.degree,
            dec=df['latitude'] * u.degree,
            distance=df['temperature'] * u.kelvin
        )

        # Coordinate transformations
        galactic_coords = coords.galactic

        # Distance calculations
        distance_stats = {
            'mean_distance': np.mean(coords.distance),
            'max_distance': np.max(coords.distance),
            'min_distance': np.min(coords.distance)
        }
        logger.info(f"Distance Statistics: {distance_stats}")

        # Visualization of Galactic Coordinates
        plt.figure(figsize=(12, 8))
        plt.scatter(
            galactic_coords.l.degree,
            galactic_coords.b.degree,
            c=df['