In [5]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score
import random
import os
from datetime import datetime

m11_asal = 'model_random_forest_sss_sst_only.pkl'
m12_asal = 'model_random_forest.pkl'

x_cols_m11 = ['SSS', 'SST']
x_cols_m12 = ['LATITUDE', 'SSS', 'SST', 'SSH', 'UO', 'VO', 'MLD']

# Compute my testing dates
input_data = '../model_collocated_10d_filled' 
start_date = '20100101'
end_date = '20221231'

random.seed(56)

files = [f for f in os.listdir(input_data) if f.endswith('.feather') and os.path.isfile(os.path.join(input_data, f))]
file_names = [os.path.splitext(f)[0] for f in files]
file_names = [date for date in file_names if date >= start_date]
file_names = [date for date in file_names if date <= end_date]

random.shuffle(file_names)
split_point = int(len(file_names) * 0.9)  

print(len(file_names))
testing_dates = file_names[split_point:]

testing_dates = [datetime.strptime(date, '%Y%m%d') for date in testing_dates]
print(len(testing_dates))
# Load my testing dataset
test_data = []
for date in testing_dates:
    date_str = date.strftime('%Y%m%d')
    profiles_data = pd.read_feather(f'{input_data}/{date_str}.feather')
    if not profiles_data.empty:
        test_data.append(profiles_data) 
            
if test_data:
    test_data = pd.concat(test_data, ignore_index=True)  

test_data.dropna(inplace=True)
test_data = test_data[test_data['LATITUDE'] > -60]
test_data = test_data[test_data['LATITUDE'] < 60]

r2_m11_as = np.full(46, np.nan)
r2_m11_ct = np.full(46, np.nan)
ev_m11_as = np.full(46, np.nan)
ev_m11_ct = np.full(46, np.nan)
mse_m11_as = np.full(46, np.nan)
mse_m11_ct = np.full(46, np.nan)
mae_m11_as = np.full(46, np.nan)
mae_m11_ct = np.full(46, np.nan)

r2_m12_as = np.full(46, np.nan)
r2_m12_ct = np.full(46, np.nan)
ev_m12_as = np.full(46, np.nan)
ev_m12_ct = np.full(46, np.nan)
mse_m12_as = np.full(46, np.nan)
mse_m12_ct = np.full(46, np.nan)
mae_m12_as = np.full(46, np.nan)
mae_m12_ct = np.full(46, np.nan)

4748
475


In [6]:
y_test = test_data.iloc[:,-92:]

In [7]:
# R2, MSE, MAE, EV M11/ASAL, CTEMP
model = pickle.load(open(m11_asal, 'rb'))
y_pred = model.predict(test_data[x_cols_m11])

model2 = pickle.load(open(m12_asal, 'rb'))
y_pred2 = model2.predict(test_data[x_cols_m12])


[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    6.5s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   26.3s finished
[Parallel(n_jobs=8)]: Using backend ThreadingBackend with 8 concurrent workers.
[Parallel(n_jobs=8)]: Done  16 tasks      | elapsed:    7.0s
[Parallel(n_jobs=8)]: Done 100 out of 100 | elapsed:   26.9s finished


In [8]:
for i in range(0,46):
    r2_m11_as[i] = r2_score(y_test.iloc[:,i], y_pred[:,i])
    ev_m11_as[i] = explained_variance_score(y_test.iloc[:,i], y_pred[:,i])
    mse_m11_as[i] = mean_squared_error(y_test.iloc[:,i], y_pred[:,i])
    mae_m11_as[i] = mean_absolute_error(y_test.iloc[:,i], y_pred[:,i])

    r2_m11_ct[i] = r2_score(y_test.iloc[:,i+46], y_pred[:,i+46])
    ev_m11_ct[i] = explained_variance_score(y_test.iloc[:,i+46], y_pred[:,i+46])
    mse_m11_ct[i] = mean_squared_error(y_test.iloc[:,i+46], y_pred[:,i+46])
    mae_m11_ct[i] = mean_absolute_error(y_test.iloc[:,i+46], y_pred[:,i+46])

    r2_m12_as[i] = r2_score(y_test.iloc[:,i], y_pred2[:,i])
    ev_m12_as[i] = explained_variance_score(y_test.iloc[:,i], y_pred2[:,i])
    mse_m12_as[i] = mean_squared_error(y_test.iloc[:,i], y_pred2[:,i])
    mae_m12_as[i] = mean_absolute_error(y_test.iloc[:,i], y_pred2[:,i])

    r2_m12_ct[i] = r2_score(y_test.iloc[:,i+46], y_pred2[:,i+46])
    ev_m12_ct[i] = explained_variance_score(y_test.iloc[:,i+46], y_pred2[:,i+46])
    mse_m12_ct[i] = mean_squared_error(y_test.iloc[:,i+46], y_pred2[:,i+46])
    mae_m12_ct[i] = mean_absolute_error(y_test.iloc[:,i+46], y_pred2[:,i+46])
# R2, MSE, MAE, EV m12/ASAL, CTEMP
del model

# Create a DataFrame
data = {
    'r2_m11_as': r2_m11_as,
    'r2_m11_ct': r2_m11_ct,
    'ev_m11_as': ev_m11_as,
    'ev_m11_ct': ev_m11_ct,
    'mse_m11_as': mse_m11_as,
    'mse_m11_ct': mse_m11_ct,
    'mae_m11_as': mae_m11_as,
    'mae_m11_ct': mae_m11_ct,
    'r2_m12_as': r2_m12_as,
    'r2_m12_ct': r2_m12_ct,
    'ev_m12_as': ev_m12_as,
    'ev_m12_ct': ev_m12_ct,
    'mse_m12_as': mse_m12_as,
    'mse_m12_ct': mse_m12_ct,
    'mae_m12_as': mae_m12_as,
    'mae_m12_ct': mae_m12_ct,
}

df = pd.DataFrame(data)

# Save to CSV
df.to_csv('random_forest_scores.csv', index=False)


In [12]:
print('M11 Salinity')
print(f'R2: {r2_score(y_test.iloc[:,0:46], y_pred[:,0:46])}')
print(f'EV: {explained_variance_score(y_test.iloc[:,0:46], y_pred[:,0:46])}')
print(f'MSE: {mean_squared_error(y_test.iloc[:,0:46], y_pred[:,0:46])}')
print(f'MAE: {mean_absolute_error(y_test.iloc[:,0:46], y_pred[:,0:46])}')

print('M12 Salinity')
print(f'R2: {r2_score(y_test.iloc[:,0:46], y_pred2[:,0:46])}')
print(f'EV: {explained_variance_score(y_test.iloc[:,0:46], y_pred2[:,0:46])}')
print(f'MSE: {mean_squared_error(y_test.iloc[:,0:46], y_pred2[:,0:46])}')
print(f'MAE: {mean_absolute_error(y_test.iloc[:,0:46], y_pred2[:,0:46])}')

print('M11 Temperature')
print(f'R2: {r2_score(y_test.iloc[:,46:], y_pred[:,46:])}')
print(f'EV: {explained_variance_score(y_test.iloc[:,46:], y_pred[:,46:])}')
print(f'MSE: {mean_squared_error(y_test.iloc[:,46:], y_pred[:,46:])}')
print(f'MAE: {mean_absolute_error(y_test.iloc[:,46:], y_pred[:,46:])}')

print('M12 Temperature')
print(f'R2: {r2_score(y_test.iloc[:,46:], y_pred2[:,46:])}')
print(f'EV: {explained_variance_score(y_test.iloc[:,46:], y_pred2[:,46:])}')
print(f'MSE: {mean_squared_error(y_test.iloc[:,46:], y_pred2[:,46:])}')
print(f'MAE: {mean_absolute_error(y_test.iloc[:,46:], y_pred2[:,46:])}')


M11 Salinity
R2: 0.8841394981738516
EV: 0.8841443814723379
MSE: 0.08283676285403137
MAE: 0.17284012982509322
M12 Salinity
R2: 0.954922814805305
EV: 0.9549859349862251
MSE: 0.035482418735213093
MAE: 0.10881381150690438
M11 Temperature
R2: 0.7576362088659839
EV: 0.7577023508436771
MSE: 5.698026450162697
MAE: 1.4975140105776195
M12 Temperature
R2: 0.8508307399769619
EV: 0.8512681045000907
MSE: 3.1568424476492347
MAE: 0.8567737346456851
