In [None]:
target_variable = {
    "xco2": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/XCO2_resample/global_grid_0.1_2019_2025_xco2.nc",
    "emission": "/data3/interns/NRT_CO2_Emission_Map_Project/ML_XCO2/CarbonMonitor0Power_emission_201901_202505.nc"  
}
feature_variables = {
    "t2m": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/t2m_daily_0p1deg.nc",
    "d2m": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/d2m_daily_0p1deg.nc",
    "u10": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/u10_daily_0p1deg.nc",
    "v10": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/v10_daily_0p1deg.nc",
    "msl": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/msl_daily_0p1deg.nc",
    "sp": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/sp_daily_0p1deg.nc",
    "skt": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/skt_daily_0p1deg.nc",
    "tp": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/tp_daily_0p1deg.nc",
    "e": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/e_daily_0p1deg.nc",
    "ssr": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/ssr_daily_0p1deg.nc",
    "str": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/str_daily_0p1deg.nc",
    "tcw": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/tcw_daily_0p1deg.nc",
    "blh": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/ERA5_resample/blh_daily_0p1deg.nc",
    
    "NO2": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/XCO2_resample/global_grid_0.1_2019_2025_NO2.nc",
    "is_weekend": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/XCO2_resample/global_grid_0.1_2019_2025_weekday_weekend.nc",
    "population": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/Population_global_0.1degree_2019_2025_ns.nc",
    "elevation": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/SRTM_elevation_global_0.1degree_2019_2025_ns.nc",
    "landuse": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/Landuse_global_0.1degree_2019_2025_ns.nc",
    "aspect": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/SRTM_aspect_global_0.1degree_2019_2025_ns.nc",
    "ndvi": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/NDVI_global_0.1degree_2019_2025_ns.nc",
    "gpp": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/GPP_global_0.1degree_2019_2025_ns.nc",
    "lai": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/LAI_global_0.1degree_2019_2025_ns.nc",
    "ntl": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/VIIRS_NTL_global_0.1degree_2019_2025_ns.nc",
    "evi": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/EVI_global_0.1degree_2019_2025_ns.nc",
    "slope": "/data3/interns/NRT_CO2_Emission_Map_Project/MingjuanZhang_work/SRTM_slope_global_0.1degree_2019_2025_ns.nc",
    "odiac": "/data3/interns/NRT_CO2_Emission_Map_Project/HaoHu_work/odiac_interp_2019_2025.nc",
    "CO2_fire": "/data3/interns/NRT_CO2_Emission_Map_Project/PinyiLu_work/GFAS_resample/GFAS_resample_final.nc",

}


# **1. Load libraries**

In [None]:
import xarray as xr
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tqdm import tqdm
#import cupy as cp
import time
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
from sklearn.model_selection import train_test_split
import os
import matplotlib as mpl

# **2 Plot Correlation Figures**

## **Load Figures**

In [None]:
data = np.load("xco2_nonnan_processed.npy", allow_pickle=True)

df = pd.DataFrame(data)

df

In [None]:
date = pd.to_datetime(df[['year','month','day']])
df['n_day'] = (date - pd.Timestamp('2019-01-01')).dt.days + 1

In [None]:
colmap = {
    "t2m": "T2",
    "d2m": "DP2",
    "u10": "U10",
    "v10": "V10",
    "msl": "MSL",
    "sp":  "Psfc",
    "skt": "TS",
    "tp": "Prec",
    "e": "E",
    "ssr": "SSR",
    "str": "STR",
    "tcw": "TCW",
    "blh": "BLH",
    "NO2": "NO2",
    "is_weekend": "WE",
    "population": "POP",
    "elevation": "ELE",
    "aspect": "ASP",
    "ndvi": "NDVI",
    "gpp": "GPP",
    "lai": "LAI",
    "ntl": "NTL",
    "evi": "EVI",
    "slope": "SLO",
    "odiac": "FFE",
    "CO2_fire": "BBE",
    "geo_x": "geo_x",
    "geo_y": "geo_y",
    "geo_z": "geo_z",
    "month_sin": "mon_sin",
    "month_cos": "mon_cos",
    "n_day": "Day"
}

df = df.rename(columns=colmap)


In [None]:
vars = [col for col in df.columns if col not in ["lat", "lon", "time", "split", "time_bin", "lat_bin", "lon_bin", "spacetime_block","month", "emission","year", "day"]]

print(", ".join(map(str, vars)))


In [None]:
summary = pd.DataFrame({
    'null_count': df.isnull().sum(),
    'non_null_count': df.notnull().sum()
}).reset_index().rename(columns={'index': 'variable'})

summary

In [None]:
mpl.rcParams.update({
    "font.size": 15,   
    "axes.labelsize": 10, 
    "xtick.labelsize": 10, 
    "ytick.labelsize": 10, 
    "legend.fontsize": 15,
    "figure.titlesize": 15 
})

numeric_vars = [v for v in vars if v in df.columns and pd.api.types.is_numeric_dtype(df[v])]
corr = df[numeric_vars].corr(method="pearson")
plt.figure(figsize=(12, 10))

sns.heatmap(
    corr,
    cmap="RdBu_r", 
    center=0,   
    annot=False,  
    square=True,  
    cbar_kws={"label": "Correlation"}  
)
plt.tight_layout()
plt.show()