## Cloud Fraction & Water Vapour Analysis
This section reproduces the analysis from `cloud_fraction.py`. We calculate global area-weighted means and standard errors for ERA5 monthly means (2005–2025) for:

- **TCC** (total cloud fraction, 0..1)
- **TCLW** (total column cloud liquid water, kg m$^{-2}$)
- **TCIW** (total column cloud ice water, kg m$^{-2}$)
- **TCWV** (total column water vapour, kg m$^{-2}$)

The results will appear inline as tables and plots, and can be saved as CSV.

In [None]:
# Imports for cloud fraction analysis
import glob
import numpy as np
import pandas as pd
from netCDF4 import Dataset
import matplotlib.pyplot as plt

# Ensure plots appear inline
%matplotlib inline


In [None]:
# Set path to ERA5 NetCDF files (update this to your local folder)
data_path = "/path/to/ERA5/files/*.nc"
files = sorted(glob.glob(data_path))

print(f"Found {len(files)} files")

In [None]:
# Function to compute area-weighted mean over latitude/longitude
def area_weighted_mean(var, lats):
    weights = np.cos(np.deg2rad(lats))
    return np.average(var, axis=(-2, -1), weights=weights)

# Loop through files and compute means and standard errors
results = []
for f in files:
    with Dataset(f) as ds:
        # Example assumes variables named 'tcc', 'tclw', 'tciw', 'tcwv'
        tcc = ds.variables['tcc'][:].squeeze()
        tclw = ds.variables['tclw'][:].squeeze()
        tciw = ds.variables['tciw'][:].squeeze()
        tcwv = ds.variables['tcwv'][:].squeeze()
        lats = ds.variables['latitude'][:]
        
        # Compute means
        means = [area_weighted_mean(arr, lats) for arr in [tcc, tclw, tciw, tcwv]]
        ses = [arr.std() / np.sqrt(arr.size) for arr in [tcc, tclw, tciw, tcwv]]
        
        # Parse year, month from filename (assumes contains YYYYMM)
        import re
        m = re.search(r"(\d{4})(\d{2})", f)
        year, month = int(m.group(1)), int(m.group(2))
        
        results.append([year, month] + means + ses)

columns = ["year","month",
           "tcc_mean","tclw_mean","tciw_mean","tcwv_mean",
           "tcc_se","tclw_se","tciw_se","tcwv_se"]
df_cloud = pd.DataFrame(results, columns=columns)

# Display table inline
df_cloud.head()

In [None]:
# Plot timeseries inline
plt.figure(figsize=(10,6))
plt.plot(pd.to_datetime(df_cloud[['year','month']].assign(day=1)),
         df_cloud['tcc_mean'], label='TCC (cloud fraction)')
plt.plot(pd.to_datetime(df_cloud[['year','month']].assign(day=1)),
         df_cloud['tcwv_mean'], label='TCWV (water vapour)')
plt.xlabel('Time')
plt.ylabel('Value')
plt.title('ERA5 Global Means')
plt.legend()
plt.show()

In [None]:
# Save results to CSV (optional)
df_cloud.to_csv("cloud_tcwv_globals_ERA5_2005_2025.csv", index=False)

# Student Project Tutorial
This notebook combines multiple analysis steps into a single workflow.

## Overall Data Analysis

In [None]:
# Compute summary statistics if data loaded
# (Assumes df_cloud or another DataFrame is present)
summary_stats = df_cloud.describe().T
summary_stats