In [2]:
!pip install codecarbon
!pip install gpflow
!pip install utils

Collecting codecarbon
  Downloading codecarbon-2.2.5-py3-none-any.whl (176 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m176.1/176.1 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting arrow (from codecarbon)
  Downloading arrow-1.2.3-py3-none-any.whl (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.4/66.4 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
Collecting pynvml (from codecarbon)
  Downloading pynvml-11.5.0-py3-none-any.whl (53 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m53.1/53.1 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Collecting fuzzywuzzy (from codecarbon)
  Downloading fuzzywuzzy-0.18.0-py2.py3-none-any.whl (18 kB)
Installing collected packages: fuzzywuzzy, pynvml, arrow, codecarbon
Successfully installed arrow-1.2.3 codecarbon-2.2.5 fuzzywuzzy-0.18.0 pynvml-11.5.0
Collecting gpflow
  Downloading gpflow-2.8.1-py3-none-any.whl (376 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from codecarbon import EmissionsTracker
tracker = EmissionsTracker()
tracker.start()



import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import colors
import xarray as xr
from glob import glob

import tensorflow as tf
import gpflow
from utils import *

%matplotlib inline
%config InlineBackend.figure_format = 'retina'
%load_ext autoreload
%autoreload 2

plt.rcParams['savefig.dpi'] = 400
plt.rcParams['font.size'] = 13
plt.rcParams["legend.frameon"] = False


from google.colab import drive


drive.mount('/content/gdrive/', force_remount=True)


import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from glob import glob


def make_dir(path):
    if os.path.exists(path) is False:
        os.makedirs(path)


def prepare_predictor(data_sets, data_path,time_reindex=True):
    """
    Args:
        data_sets list(str): names of datasets
    """

    # Create training and testing arrays
    if isinstance(data_sets, str):
        data_sets = [data_sets]

    X_all      = []
    length_all = []

    for file in data_sets:
        data = xr.open_dataset(os.path.join(data_path, f"inputs_{file}.nc"))
        X_all.append(data)
        length_all.append(len(data.time))

    X = xr.concat(X_all,dim='time')
    length_all = np.array(length_all)
    # X = xr.concat([xr.open_dataset(data_path + f"inputs_{file}.nc") for file in data_sets], dim='time')
    if time_reindex:
        X = X.assign_coords(time=np.arange(len(X.time)))

    return X, length_all

def prepare_predictand(data_sets,data_path,time_reindex=True):
    if isinstance(data_sets, str):
        data_sets = [data_sets]

    Y_all = []
    length_all = []

    for file in data_sets:
        data = xr.open_dataset(os.path.join(data_path, f"outputs_{file}.nc"))
        Y_all.append(data)
        length_all.append(len(data.time))

    length_all = np.array(length_all)
    Y = xr.concat(Y_all,dim='time').mean('member')
    # Y = xr.concat([xr.open_dataset(data_path + f"outputs_{file}.nc") for file in data_sets], dim='time').mean("member")
    Y = Y.rename({'lon':'longitude','lat': 'latitude'}).transpose('time','latitude', 'longitude').drop(['quantile'])
    if time_reindex:
        Y = Y.assign_coords(time=np.arange(len(Y.time)))

    return Y, length_all


def get_rmse(truth, pred):
    weights = np.cos(np.deg2rad(truth.latitude))
    return np.sqrt(((truth-pred)**2).weighted(weights).mean(['latitude', 'longitude'])).data.mean()

def plot_history(history):
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean squared error')
    plt.plot(history.epoch, np.array(history.history['loss']),
           label='Train Loss')
    plt.plot(history.epoch, np.array(history.history['val_loss']),
           label = 'Val loss')
    plt.legend()




# Utilities for normalizing the input data
def normalize(data, var, meanstd_dict):
    mean = meanstd_dict[var][0]
    std = meanstd_dict[var][1]
    return (data - mean)/std

def mean_std_plot(data,color,label,ax):

    mean = data.mean(['latitude','longitude'])
    std  = data.std(['latitude','longitude'])
    yr   = data.time.values

    ax.plot(yr,mean,color=color,label=label,linewidth=4)
    ax.fill_between(yr,mean+std,mean-std,facecolor=color,alpha=0.4)

    return yr, mean




cwd = os.getcwd()

train_path = os.path.join(cwd,'/content/gdrive/MyDrive/climate_emulator_duncan_parris/','train_val1')
test_path  = os.path.join(cwd,'/content/gdrive/MyDrive/climate_emulator_duncan_parris/','train_val1')

make_dir(train_path)
make_dir(test_path)


# Training set
train_files = ["historical", "ssp585", "ssp126", "ssp370","hist-aer","hist-GHG"]
X_train_xr, X_length  = prepare_predictor(train_files,train_path)
y_train_xr, y_length  = prepare_predictand(train_files,train_path)

# Test set
X_test_xr, _ = prepare_predictor('ssp245', data_path=test_path,time_reindex=False)
y_test_xr, _ = prepare_predictand('ssp245',data_path=test_path,time_reindex=False)

X_train_df = pd.DataFrame({"CO2": X_train_xr["CO2"].data,
                           "CH4": X_train_xr["CH4"].data
                           #"SO2": X_train_xr["SO2"].data
                           #"BC" : X_train_xr["BC"].data
                          }, index=X_train_xr["CO2"].coords['time'].data)

X_test_df  = pd.DataFrame({"CO2": X_test_xr["CO2"].data,
                           "CH4": X_test_xr["CH4"].data
                          }, index=X_test_xr["CO2"].coords['time'].data)


y_train_df = y_train_xr["tas"].stack(dim=["latitude", "longitude"])
y_train_df = pd.DataFrame(y_train_df.to_pandas())

# Standardize the predictors
mean, std = X_train_df.mean(), X_train_df.std()

X_train_df   = (X_train_df - mean)/std
X_test_df    = (X_test_df - mean)/std

X_train = X_train_df.to_numpy()
X_test  = X_test_df.to_numpy()



# Standardize the predictand
y_train    = y_train_df.to_numpy().astype(np.float64)
train_tas_mean, train_tas_std = y_train.mean(), y_train.std()
y_train    = (y_train - train_tas_mean) / train_tas_std


print(X_train.shape,y_train.shape,X_test.shape)


from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor
svr = SVR()

# Create the MultiOutputRegressor with SVR
model = MultiOutputRegressor(svr)

# Fit the model
model.fit(X_train, y_train)

# predict
standard_posterior_mean =  model.predict(X_test)
standard_posterior_var = model.predict(X_test)
posterior_mean   = standard_posterior_mean * train_tas_std + train_tas_mean
posterior_std    = np.sqrt(standard_posterior_var) * train_tas_std
y_test_pre_mean = np.reshape(posterior_mean,[posterior_mean.shape[0], 96, 144])
y_test_pre_std  = np.reshape(posterior_std,[posterior_std.shape[0], 96, 144])

y_test_pre = xr.Dataset(coords={'time': X_test_xr.time.values,
                                 'latitude': X_test_xr.latitude.values,
                                 'longitude': X_test_xr.longitude.values},
                             data_vars=dict(tas_mean=(['time', 'latitude', 'longitude'], y_test_pre_mean),
                                            tas_std=(['time', 'latitude', 'longitude'], y_test_pre_std)))
standard_posterior_var = np.clip(standard_posterior_var, a_min=0.0, a_max=None)
posterior_std = np.sqrt(standard_posterior_var) * train_tas_std

# put output back into pd.DataFrame format for calculating RMSE/plotting
posterior_tas = np.reshape(posterior_mean, [86, 96, 144])
posterior_tas_std = np.reshape(posterior_std, [86, 96, 144])
posterior_tas_data = xr.DataArray(posterior_tas, dims=y_test_xr.tas.dims, coords=y_test_xr.tas.coords)
posterior_tas_std_data = xr.DataArray(posterior_tas_std, dims=y_test_xr.tas.dims, coords=y_test_xr.tas.coords)

tracker.stop()
emissions: float = tracker.stop()
print(emissions)

[codecarbon INFO @ 13:15:38] [setup] RAM Tracking...
[codecarbon INFO @ 13:15:38] [setup] GPU Tracking...
[codecarbon INFO @ 13:15:38] No GPU found.
[codecarbon INFO @ 13:15:38] [setup] CPU Tracking...
[codecarbon INFO @ 13:15:39] CPU Model on constant consumption mode: AMD EPYC 7B12
[codecarbon INFO @ 13:15:39] >>> Tracker's metadata:
[codecarbon INFO @ 13:15:39]   Platform system: Linux-5.15.109+-x86_64-with-glibc2.31
[codecarbon INFO @ 13:15:39]   Python version: 3.10.12
[codecarbon INFO @ 13:15:39]   CodeCarbon version: 2.2.5
[codecarbon INFO @ 13:15:39]   Available RAM : 12.678 GB
[codecarbon INFO @ 13:15:39]   CPU count: 2
[codecarbon INFO @ 13:15:39]   CPU model: AMD EPYC 7B12
[codecarbon INFO @ 13:15:39]   GPU count: None
[codecarbon INFO @ 13:15:39]   GPU model: None
[codecarbon INFO @ 13:15:54] Energy consumed for RAM : 0.000020 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:15:54] Energy consumed for all CPUs : 0.000500 kWh. Total CPU Power : 120.0 W
[codecarbon 

Mounted at /content/gdrive/


[codecarbon INFO @ 13:16:54] Energy consumed for RAM : 0.000099 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:16:54] Energy consumed for all CPUs : 0.002499 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:16:54] 0.002598 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:17:10] Energy consumed for RAM : 0.000119 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:17:10] Energy consumed for all CPUs : 0.003002 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:17:10] 0.003121 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:17:26] Energy consumed for RAM : 0.000140 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:17:26] Energy consumed for all CPUs : 0.003543 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:17:26] 0.003684 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:17:41] Energy consumed for RAM : 0.000160 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:17:41] Energy consumed fo

(753, 2) (753, 13824) (86, 2)


[codecarbon INFO @ 13:17:56] Energy consumed for RAM : 0.000180 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:17:56] Energy consumed for all CPUs : 0.004543 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:17:56] 0.004723 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:18:11] Energy consumed for RAM : 0.000200 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:18:11] Energy consumed for all CPUs : 0.005043 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:18:11] 0.005243 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:18:26] Energy consumed for RAM : 0.000220 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:18:26] Energy consumed for all CPUs : 0.005543 kWh. Total CPU Power : 120.0 W
[codecarbon INFO @ 13:18:26] 0.005762 kWh of electricity used since the beginning.
[codecarbon INFO @ 13:18:41] Energy consumed for RAM : 0.000239 kWh. RAM Power : 4.754395008087158 W
[codecarbon INFO @ 13:18:41] Energy consumed fo

0.008180470284288779


[codecarbon INFO @ 09:36:00] Energy consumed for RAM : 0.001560 kWh. RAM Power : 4.754392147064209 W
[codecarbon INFO @ 09:36:00] Energy consumed for all CPUs : 0.013948 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 09:36:00] 0.015508 kWh of electricity used since the beginning.


0.0021517204344742155
