In [None]:
import sys
import numpy as np
import pandas as pd
import os
from pathlib import Path

# get current path
notebook_dir = Path.cwd()
project_root = notebook_dir.parent
sys.path.append(str(project_root))  # Go up one level to find src/
print("Current directory:", os.getcwd())  # Should show temperature-comparison
from src.data_loading.simple_loader import load_berkeley_earth, load_era5

# Create output directory with ABSOLUTE path
output_dir = Path.home() / "Documents" / "temperature-comparison-data" / "processed"
output_dir.mkdir(parents=True, exist_ok=True)
output_dir_BE = output_dir / "BE"
output_dir_ERA5 = output_dir / "ERA5"
output_dir_BE.mkdir(parents=True, exist_ok=True)
output_dir_ERA5.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {output_dir}")

# Load raw data using our loader
print("Loading raw data...")
be = load_berkeley_earth()
era5 = load_era5()
era5 = era5.rename({'t2m': 'temperature', 'valid_time': 'time'})

# 1. Convert Berkeley Earth time to datetime
def decimal_year_to_datetime(decimal_years):
    datetimes = []
    for decimal_year in decimal_years: 
        year = int(decimal_year) 
        remainder = decimal_year - year
        days_in_year = 366 if pd.Timestamp(f"{year}-01-01").is_leap_year else 365
        days = int(remainder * days_in_year)
        date = pd.Timestamp(f"{year}-01-01") + pd.Timedelta(days=days)
        datetimes.append(date)
    return np.array(datetimes, dtype='datetime64[ns]')
be['time'] = decimal_year_to_datetime(be.time)

# 2. Calculate ERA5 monthly anomalies based on 1951-1980 monthly averages
# calculate monthly averages for 1951-1980
#era5['climatology'] = era5.sel(time=slice('1951-01-01', '1980-12-31')).groupby('time.month').mean('time')
era5['climatology'] = era5['temperature'].sel(time=slice('1951-01-01', '1980-12-31')).groupby('time.month').mean('time')

# calculate monthly anomalies
#era5['anomalies'] = era5.groupby('time.month') - era5['climatology']
era5['anomalies'] = era5['temperature'].groupby('time.month') - era5['climatology']

# 3. Convert ERA5 longitude to -180 to 180
print("Converting ERA5 longitude...")
era5 = era5.assign_coords(longitude=(era5.longitude + 180) % 360 - 180)
era5 = era5.sortby('longitude')

# 4. Interpolate ERA5 to Berkeley Earth grid
print("Interpolating to common grid...")
era5_interp = era5.interp_like(be)

# 5. Apply Berkeley Earth mask to ERA5
print("Applying mask...")
era5_interp['temperature'] = era5_interp.temperature.where(~np.isnan(be.temperature))


# Save processed data
print("Saving processed data...")
be.to_netcdf(output_dir_BE / "berkeley_earth_preprocessed.nc")
era5_interp.to_netcdf(output_dir_ERA5 / "era5_preprocessed.nc")

print("Preprocessing complete!")

Current directory: /main/Users/devin/Documents/temperature-comparison/notebooks
Output directory: /home/devin/Documents/temperature-comparison-data/processed
Loading raw data...
/main/Users/devin/Documents/temperature-comparison/config/config.yaml
Loading Berkeley Earth from: /home/devin/Documents/temperature-comparison-data/Raw/BE/Global_TAVG_Gridded_0p25deg.nc
/main/Users/devin/Documents/temperature-comparison/config/config.yaml
Loading ERA5 from: /home/devin/Documents/temperature-comparison-data/Raw/ERA5/ERA5_TAVG_Monthly_Raw.nc
Converting ERA5 longitude...
Interpolating to common grid...
Applying mask...
Saving processed data...


PermissionError: [Errno 13] Permission denied: b'/main/Users/devin/Documents/temperature-comparison/notebooks/temperature-comparison-data/processed/berkeley_earth_preprocessed.nc'