# Earth's Atmospheric Characterization - Data Exploration

This notebook demonstrates how to explore and visualize the atmospheric datasets used in the Earth's Atmospheric Characterization project for Solar Power Satellite (SPS) development.

In [None]:
# Import necessary libraries
import os
import sys
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
from pathlib import Path

# Add the src directory to the Python path
sys.path.insert(0, str(Path().resolve().parent))

# Import project modules
from src.data_preprocessing.standardize import DataStandardizer
from src.data_preprocessing.resampling import SpatialResampler
from src.data_preprocessing.normalization import DataNormalizer

# Set up plotting
%matplotlib inline
plt.style.use('seaborn-whitegrid')
plt.rcParams['figure.figsize'] = (12, 8)

## 1. Exploring Raw Data

Let's first explore the raw data files downloaded from various sources.

In [None]:
# Define data directories
raw_dir = Path('../data/raw')
processed_dir = Path('../data/processed')

# List available datasets
def list_datasets(directory):
    datasets = {}
    for path in directory.glob('*'):
        if path.is_dir():
            files = list(path.glob('*'))
            datasets[path.name] = {
                'count': len(files),
                'size_mb': sum(f.stat().st_size for f in files) / (1024 * 1024),
                'files': [f.name for f in files[:5]]  # Show first 5 files
            }
    return datasets

# List raw datasets
raw_datasets = list_datasets(raw_dir)

# Display dataset information
for name, info in raw_datasets.items():
    print(f"Dataset: {name}")
    print(f"  Count: {info['count']} files")
    print(f"  Size: {info['size_mb']:.2f} MB")
    print(f"  Sample files: {', '.join(info['files'][:3])}...")
    print()

## 2. Examining MODIS Data

Let's examine a MODIS dataset to understand its structure and content.

In [None]:
# Find a MODIS file
modis_files = list(raw_dir.glob('MOD06_L2/*'))
if modis_files:
    modis_file = modis_files[0]
    print(f"Examining MODIS file: {modis_file.name}")
    
    # Open the file using h5py
    import h5py
    with h5py.File(modis_file, 'r') as f:
        # Print file structure
        def print_structure(name, obj):
            if isinstance(obj, h5py.Dataset):
                print(f"{name}: {obj.shape}, {obj.dtype}")
        
        print("\nFile structure:")
        f.visititems(print_structure)
        
        # Print global attributes
        print("\nGlobal attributes:")
        for attr in f.attrs:
            print(f"  {attr}: {f.attrs[attr]}")
        
        # Extract and plot a sample dataset if available
        cloud_datasets = [name for name in f.keys() if 'Cloud' in name]
        if cloud_datasets:
            sample_dataset = cloud_datasets[0]
            print(f"\nExtracting sample dataset: {sample_dataset}")
            data = f[sample_dataset][()]
            
            plt.figure(figsize=(10, 8))
            plt.imshow(data, cmap='viridis')
            plt.colorbar(label=sample_dataset)
            plt.title(f"MODIS {sample_dataset}")
            plt.show()
else:
    print("No MODIS files found. Please download data first.")

## 3. Examining Sentinel-5P Data

Let's examine a Sentinel-5P dataset to understand its structure and content.

In [None]:
# Find a Sentinel-5P file
sentinel_files = list(raw_dir.glob('S5P_L2_AER_AI/*'))
if sentinel_files:
    sentinel_file = sentinel_files[0]
    print(f"Examining Sentinel-5P file: {sentinel_file.name}")
    
    # Open the file using xarray
    try:
        ds = xr.open_dataset(sentinel_file)
        
        # Print dataset information
        print("\nDataset information:")
        print(ds.info())
        
        # Print dataset dimensions
        print("\nDimensions:")
        for dim, size in ds.dims.items():
            print(f"  {dim}: {size}")
        
        # Print dataset variables
        print("\nVariables:")
        for var in ds.variables:
            print(f"  {var}: {ds[var].shape}, {ds[var].dtype}")
        
        # Extract and plot a sample variable if available
        aerosol_vars = [var for var in ds.variables if 'aerosol' in var.lower()]
        if aerosol_vars:
            sample_var = aerosol_vars[0]
            print(f"\nExtracting sample variable: {sample_var}")
            
            plt.figure(figsize=(12, 8))
            ds[sample_var].plot()
            plt.title(f"Sentinel-5P {sample_var}")
            plt.show()
        
        # Close the dataset
        ds.close()
    except Exception as e:
        print(f"Error opening Sentinel-5P file: {e}")
else:
    print("No Sentinel-5P files found. Please download data first.")

## 4. Standardizing Data

Let's standardize a sample dataset to demonstrate the standardization process.

In [None]:
# Create a data standardizer
standardizer = DataStandardizer(raw_dir=str(raw_dir), processed_dir=str(processed_dir))

# Standardize a sample file
if modis_files:
    print(f"Standardizing MODIS file: {modis_file.name}")
    try:
        standardized_file = standardizer.standardize_modis(modis_file)
        print(f"Standardized file: {standardized_file}")
        
        # Open the standardized file
        ds = xr.open_dataset(standardized_file)
        
        # Print dataset information
        print("\nStandardized dataset information:")
        print(ds.info())
        
        # Close the dataset
        ds.close()
    except Exception as e:
        print(f"Error standardizing MODIS file: {e}")
else:
    print("No MODIS files found for standardization.")

## 5. Wavelength Analysis for Solar Power Satellite (SPS)

Let's analyze atmospheric transmission at different wavelengths to identify optimal wavelengths for SPS laser power transmission.

In [None]:
# Import the transmission simulator
from src.wavelength_optimizer.transmission import SimpleTransmissionSimulator

# Create a transmission simulator
simulator = SimpleTransmissionSimulator()

# Define wavelength range (300-2500 nm)
wavelengths = np.linspace(300, 2500, 1000)

# Simulate transmission for different atmospheric profiles
profiles = ['tropical', 'midlatitude_summer', 'midlatitude_winter', 'subarctic_summer', 'subarctic_winter']
transmissions = {}

for profile in profiles:
    transmissions[profile] = simulator.simulate(
        wavelengths=wavelengths,
        altitude=0,
        zenith_angle=0,
        atmosphere_profile=profile
    )

# Plot transmission vs. wavelength for different profiles
plt.figure(figsize=(14, 8))
for profile, transmission in transmissions.items():
    plt.plot(wavelengths, transmission, label=profile)

# Highlight common laser wavelengths
laser_wavelengths = {
    '808 nm': 808,   # Diode laser
    '940 nm': 940,   # Diode laser
    '1064 nm': 1064, # Nd:YAG laser
    '1550 nm': 1550, # Fiber laser
    '2100 nm': 2100  # Ho:YAG laser
}

for name, wavelength in laser_wavelengths.items():
    plt.axvline(x=wavelength, color='red', linestyle='--', alpha=0.5)
    plt.text(wavelength+10, 0.5, name, rotation=90, alpha=0.7)

plt.xlabel('Wavelength (nm)')
plt.ylabel('Transmission')
plt.title('Atmospheric Transmission vs. Wavelength for Different Atmospheric Profiles')
plt.legend()
plt.grid(True)
plt.show()

# Find optimal wavelengths for SPS
optimal_wavelengths = simulator.find_optimal_wavelengths(
    wavelength_range=(300, 2500),
    num_wavelengths=5,
    altitude=0,
    zenith_angle=0,
    atmosphere_profile='midlatitude_summer',
    min_spacing=100
)

print("\nOptimal wavelengths for SPS laser power transmission:")
for i, wavelength in enumerate(optimal_wavelengths):
    transmission = simulator.simulate(
        wavelengths=np.array([wavelength]),
        altitude=0,
        zenith_angle=0,
        atmosphere_profile='midlatitude_summer'
    )[0]
    print(f"  {i+1}. {wavelength:.1f} nm (Transmission: {transmission:.4f})")

## 6. Analyzing Transmission vs. Altitude and Zenith Angle

Let's analyze how transmission varies with altitude and zenith angle for a selected optimal wavelength.

In [None]:
# Select an optimal wavelength (e.g., 1550 nm)
optimal_wavelength = 1550.0

# Define altitude and zenith angle ranges
altitudes = np.linspace(0, 20000, 21)  # 0-20 km in 1 km steps
zenith_angles = np.linspace(0, 80, 17)  # 0-80 degrees in 5 degree steps

# Create a grid of altitude and zenith angle
altitude_grid, zenith_grid = np.meshgrid(altitudes, zenith_angles)

# Calculate transmission for each combination
transmission_grid = np.zeros_like(altitude_grid)

for i in range(len(zenith_angles)):
    for j in range(len(altitudes)):
        transmission_grid[i, j] = simulator.simulate(
            wavelengths=np.array([optimal_wavelength]),
            altitude=altitudes[j],
            zenith_angle=zenith_angles[i],
            atmosphere_profile='midlatitude_summer'
        )[0]

# Plot transmission vs. altitude and zenith angle
plt.figure(figsize=(12, 8))
contour = plt.contourf(altitude_grid, zenith_grid, transmission_grid, 20, cmap='viridis')
plt.colorbar(label='Transmission')
plt.xlabel('Altitude (m)')
plt.ylabel('Zenith Angle (degrees)')
plt.title(f'Atmospheric Transmission at {optimal_wavelength} nm')
plt.grid(True)
plt.show()

# Plot transmission vs. altitude for different zenith angles
plt.figure(figsize=(12, 8))
for i, angle in enumerate(zenith_angles[::3]):  # Plot every 3rd angle
    plt.plot(altitudes, transmission_grid[i*3, :], label=f'{angle:.0f}Â°')
plt.xlabel('Altitude (m)')
plt.ylabel('Transmission')
plt.title(f'Atmospheric Transmission at {optimal_wavelength} nm for Different Zenith Angles')
plt.legend()
plt.grid(True)
plt.show()

# Plot transmission vs. zenith angle for different altitudes
plt.figure(figsize=(12, 8))
for j, alt in enumerate(altitudes[::4]):  # Plot every 4th altitude
    plt.plot(zenith_angles, transmission_grid[:, j*4], label=f'{alt/1000:.0f} km')
plt.xlabel('Zenith Angle (degrees)')
plt.ylabel('Transmission')
plt.title(f'Atmospheric Transmission at {optimal_wavelength} nm for Different Altitudes')
plt.legend()
plt.grid(True)
plt.show()

## 7. Conclusion

In this notebook, we've explored the atmospheric datasets used in the Earth's Atmospheric Characterization project and analyzed atmospheric transmission for Solar Power Satellite (SPS) laser power transmission.

Key findings:
1. The optimal wavelength for SPS laser power transmission is around 1550 nm, which has high atmospheric transmission across different atmospheric profiles.
2. Transmission improves significantly with altitude, especially for larger zenith angles.
3. For ground-based receivers, keeping the zenith angle below 60 degrees is crucial for maintaining good transmission.

Next steps:
1. Analyze cloud and aerosol effects on transmission.
2. Incorporate turbulence modeling for beam propagation analysis.
3. Develop a comprehensive model for SPS laser power transmission efficiency.