<a href="https://colab.research.google.com/github/addinar/permafrost-modeling-convlstm/blob/main/data/notebooks/rcp_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Download Packages and Import Libraries**

In [None]:
!pip install xarray netCDF4

Collecting netCDF4
  Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (1.8 kB)
Collecting cftime (from netCDF4)
  Downloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (8.7 kB)
Downloading netCDF4-1.7.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (9.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.3/9.3 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cftime-1.6.4.post1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m19.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: cftime, netCDF4
Successfully installed cftime-1.6.4.post1 netCDF4-1.7.2


In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import os

In [None]:
import xarray as xr

In [None]:
from google.colab import drive, userdata
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
rcp_path = userdata.get('rcp_output_path')
rcp26_path = os.path.join(rcp_path, 'RCP26_df.csv')
rcp45_path = os.path.join(rcp_path, 'RCP45_df.csv')
rcp60_path = os.path.join(rcp_path, 'RCP60_df.csv')
rcp85_path = os.path.join(rcp_path, 'RCP85_df.csv')

In [None]:
rcp26_df = pd.read_csv(rcp26_path)
rcp45_df = pd.read_csv(rcp45_path)
rcp60_df = pd.read_csv(rcp60_path)
rcp85_df = pd.read_csv(rcp85_path)

In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
# first inspect column names for all
for df in dfs:
  print(df.columns)

Index(['Unnamed: 0', 'time', 'snow_depth', 'band', 'skin_temperature',
       'temperature_2m', 'snowfall_sum',
       'surface_thermal_radiation_downwards_sum',
       'avg_volumetric_water_content', 'average_lake_temperature',
       'total_precipitation_sum', 'surface_latent_heat_flux_sum',
       'surface_sensible_heat_flux_sum', 'surface_snow_amount',
       'surface_solar_radiation_downwards_sum'],
      dtype='object')
Index(['Unnamed: 0', 'time', 'surface_solar_radiation_downwards_sum', 'band',
       'surface_thermal_radiation_downwards_sum', 'total_precipitation_sum',
       'surface_sensible_heat_flux_sum', 'snowfall_sum', 'surface_snow_amount',
       'skin_temperature', 'temperature_2m', 'surface_latent_heat_flux_sum',
       'snow_depth', 'average_lake_temperature',
       'avg_volumetric_water_content'],
      dtype='object')
Index(['Unnamed: 0', 'time', 'surface_latent_heat_flux_sum', 'band',
       'surface_solar_radiation_downwards_sum', 'avg_volumetric_water_content'

# **Derive Additional Features**

## **Snow Cover & Snow Density**

In [None]:
rcp26_df.columns

Index(['Unnamed: 0', 'time', 'snow_depth', 'band', 'skin_temperature',
       'temperature_2m', 'snowfall_sum',
       'surface_thermal_radiation_downwards_sum',
       'avg_volumetric_water_content', 'average_lake_temperature',
       'total_precipitation_sum', 'surface_latent_heat_flux_sum',
       'surface_sensible_heat_flux_sum', 'surface_snow_amount',
       'surface_solar_radiation_downwards_sum'],
      dtype='object')

snow_cover = min(1, $\frac{1000 \times SD}{15}$)

where SD = snow density

snow density = $\frac{snw}{snd}$

source for this equation can be found [here](https://confluence.ecmwf.int/display/CKB/ERA-Interim%3A+documentation).



In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
MIN_SNOW_DEPTH = 0.01  # 1 cm
MIN_SNOW_AMOUNT = 0.1  # 0.1 kg/m²

for df in dfs:
    df['surface_snow_amount'] = df['surface_snow_amount'].apply(lambda x: x if x >= MIN_SNOW_AMOUNT else float('nan'))
    df['snow_depth'] = df['snow_depth'].apply(lambda x: x if x >= MIN_SNOW_DEPTH else float('nan'))

    df['snow_density'] = df['surface_snow_amount'] / df['snow_depth']

    df['snow_cover'] = df['snow_density'].apply(lambda x: min(1, (1000 * x) / 15) if not pd.isna(x) else float('nan'))

## **Snow Albedo**

Snow albedo formula:
$$
Albedo \approx \frac{rsus}{rsds}
$$

In [None]:
# first we need to get rsus (surface shortwave upwelling radiation) and process into a dataframe
rcp_path = userdata.get('RCP_folder')
rcp26_rsus = os.path.join(rcp_path, 'RCP26_rsus')
rcp45_rsus = os.path.join(rcp_path, 'RCP45_rsus')
rcp60_rsus = os.path.join(rcp_path, 'RCP60_rsus')
rcp85_rsus = os.path.join(rcp_path, 'RCP85_rsus')

In [None]:
latitudes = {
    '0': [59.66292135, 61.68539326], # band 1: [min_lat, max_lat]
    '1': [61.68539326, 63.70786517], # band 2
    '2': [63.70786517, 65.73033708], # band 3
    '3': [65.73033708, 67.75280899], # band 4
    '4': [67.75280899, 69.7752809], # band 5
    '5': [69.7752809, 71.79775281] # band 6
}

In [None]:
# inspect one
os.listdir(rcp26_rsus)

['rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_206601-207012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_206101-206512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_209101-209512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_208101-208512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_208601-209012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_207101-207512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_209601-210012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_205601-206012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_207601-208012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_203101-203512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_205101-205512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_203601-204012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_204601-205012.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_204101-204512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_202101-202512.nc',
 'rsus_Amon_GFDL-ESM2M_rcp26_r1i1p1_202601-203012.nc']

In [None]:
def create_df(file_name, rcp_folder):
  full_path = os.path.join(rcp_folder, file_name)
  data = xr.open_dataset(full_path)
  data = data.assign_coords(
    lon=(((data.lon + 180) % 360) - 180)
  )
  file_dfs = []
  for i in range(6):
    lats = latitudes[str(i)]
    min_lat = lats[0]
    max_lat = lats[1]
    try:
      region = data.sel(
        lat=slice(min_lat, max_lat),
        lon=slice(-168.75, -143.75)
      )
    except:
      region = data.sel(
        rlat=slice(min_lat, max_lat),
        rlon=slice(-168.75, -143.75)
      )
    df = region['rsus'].to_dataframe().reset_index()
    df = pd.DataFrame(df.groupby('time')['rsus'].max()).reset_index()
    df['band'] = f'band_{i+1}'
    file_dfs.append(df)
  file_df = pd.concat(file_dfs, axis=0).reset_index()
  return file_df

In [None]:
rcp26_rsus_data = []
rcp45_rsus_data = []
rcp60_rsus_data = []
rcp85_rsus_data = []

In [None]:
for file in os.listdir(rcp26_rsus):
  file_df = create_df(file, rcp26_rsus)
  rcp26_rsus_data.append(file_df)

rcp_26_rsus_df = pd.concat(rcp26_rsus_data, axis=0).reset_index(drop=True)

  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)


In [None]:
rcp_26_rsus_df.shape

(5760, 4)

In [None]:
for file in os.listdir(rcp45_rsus):
  file_df = create_df(file, rcp45_rsus)
  rcp45_rsus_data.append(file_df)

rcp_45_rsus_df = pd.concat(rcp45_rsus_data, axis=0).reset_index(drop=True)

  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)


In [None]:
rcp_45_rsus_df.shape

(5760, 4)

In [None]:
for file in os.listdir(rcp60_rsus):
  file_df = create_df(file, rcp60_rsus)
  rcp60_rsus_data.append(file_df)

rcp_60_rsus_df = pd.concat(rcp60_rsus_data, axis=0).reset_index(drop=True)

  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)


In [None]:
rcp_60_rsus_df.shape

(5760, 4)

In [None]:
for file in os.listdir(rcp85_rsus):
  file_df = create_df(file, rcp85_rsus)
  rcp85_rsus_data.append(file_df)

rcp_85_rsus_df = pd.concat(rcp85_rsus_data, axis=0).reset_index(drop=True)

  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)
  data = xr.open_dataset(full_path)


In [None]:
rcp_85_rsus_df.shape

(5760, 4)

In [None]:
rcp26_df['time'] = pd.to_datetime(rcp26_df['time'].astype(str)).dt.date
rcp45_df['time'] = pd.to_datetime(rcp45_df['time'].astype(str)).dt.date
rcp60_df['time'] = pd.to_datetime(rcp60_df['time'].astype(str)).dt.date
rcp85_df['time'] = pd.to_datetime(rcp85_df['time'].astype(str)).dt.date

rcp_26_rsus_df['time'] = pd.to_datetime(rcp_26_rsus_df['time'].astype(str)).dt.date
rcp_45_rsus_df['time'] = pd.to_datetime(rcp_45_rsus_df['time'].astype(str)).dt.date
rcp_60_rsus_df['time'] = pd.to_datetime(rcp_60_rsus_df['time'].astype(str)).dt.date
rcp_85_rsus_df['time'] = pd.to_datetime(rcp_85_rsus_df['time'].astype(str)).dt.date

In [None]:
rcp26_df = rcp26_df.merge(rcp_26_rsus_df, on=['time', 'band'], how='outer')
rcp45_df = rcp45_df.merge(rcp_45_rsus_df, on=['time', 'band'], how='outer')
rcp60_df = rcp60_df.merge(rcp_60_rsus_df, on=['time', 'band'], how='outer')
rcp85_df = rcp85_df.merge(rcp_85_rsus_df, on=['time', 'band'], how='outer')

In [None]:
print(rcp26_df.isna().sum(), rcp45_df.isna().sum(), rcp60_df.isna().sum(), rcp85_df.isna().sum())

Unnamed: 0                                    0
time                                          0
snow_depth                                 2727
band                                          0
skin_temperature                              0
temperature_2m                                0
snowfall_sum                                  0
surface_thermal_radiation_downwards_sum       0
avg_volumetric_water_content                  0
average_lake_temperature                      0
total_precipitation_sum                       0
surface_latent_heat_flux_sum                  0
surface_sensible_heat_flux_sum                0
surface_snow_amount                        1134
surface_solar_radiation_downwards_sum         0
snow_density                               2737
snow_cover                                 2737
index                                         0
rsus                                          0
dtype: int64 Unnamed: 0                                    0
time                       

In [None]:
# rename dfs
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
for df in dfs:
    snow_covered_df = df.loc[df['snow_cover'] == 1].copy()
    snow_covered_df.loc[:, 'snow_albedo'] = snow_covered_df['rsus'] / snow_covered_df['surface_solar_radiation_downwards_sum']
    snow_covered_df.replace([np.inf, -np.inf], np.nan, inplace=True)
    snow_covered_df.fillna(0, inplace=True)
    df.loc[df['snow_cover'] == 1, 'snow_albedo'] = snow_covered_df['snow_albedo']

In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

for df in dfs:
  df = df.fillna(0)

In [None]:
rcp26_df.shape

(5760, 20)

# **Impute Missing Data**

In [None]:
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer

In [None]:
imputer = IterativeImputer(max_iter=10, random_state=0)

In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
for df in dfs:
  missing_cols = df.columns[df.isna().any()]
  df[missing_cols] = imputer.fit_transform(df[missing_cols])
  print(df.isna().sum())

Unnamed: 0                                 0
time                                       0
snow_depth                                 0
band                                       0
skin_temperature                           0
temperature_2m                             0
snowfall_sum                               0
surface_thermal_radiation_downwards_sum    0
avg_volumetric_water_content               0
average_lake_temperature                   0
total_precipitation_sum                    0
surface_latent_heat_flux_sum               0
surface_sensible_heat_flux_sum             0
surface_snow_amount                        0
surface_solar_radiation_downwards_sum      0
snow_density                               0
snow_cover                                 0
index                                      0
rsus                                       0
snow_albedo                                0
dtype: int64
Unnamed: 0                                 0
time                                      

# **Restructure Dataframes**

In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
new_order = ['date', 'band', 'snow_albedo', 'snow_cover', 'snow_density',
             'snow_depth', 'snowfall_sum', 'surface_latent_heat_flux_sum',
             'surface_sensible_heat_flux_sum', 'surface_solar_radiation_downwards_sum',
             'surface_thermal_radiation_downwards_sum', 'skin_temperature',
             'total_precipitation_sum', 'avg_volumetric_water_content']

In [None]:
for i, df in enumerate(dfs):
    dfs[i] = df.copy()
    dfs[i].rename(columns={'time': 'date', 'surface_snow_amount': 'snow_depth_water_equivalent'}, inplace=True)
    dfs[i] = dfs[i][new_order]

## **One-Hot Encoding Bands**

Perform one-hot encoding on the bands.

In [None]:
rcp26_one_hot_bands = pd.get_dummies(rcp26_df['band']).astype(int)
rcp26_df.drop('band', axis=1, inplace=True)
rcp26_df = pd.concat([rcp26_df, rcp26_one_hot_bands], axis=1)

In [None]:
rcp45_one_hot_bands = pd.get_dummies(rcp45_df['band']).astype(int)
rcp45_df.drop('band', axis=1, inplace=True)
rcp45_df = pd.concat([rcp45_df, rcp45_one_hot_bands], axis=1)

In [None]:
rcp60_one_hot_bands = pd.get_dummies(rcp60_df['band']).astype(int)
rcp60_df.drop('band', axis=1, inplace=True)
rcp60_df = pd.concat([rcp60_df, rcp60_one_hot_bands], axis=1)

In [None]:
rcp85_one_hot_bands = pd.get_dummies(rcp85_df['band']).astype(int)
rcp85_df.drop('band', axis=1, inplace=True)
rcp85_df = pd.concat([rcp85_df, rcp85_one_hot_bands], axis=1)

In [None]:
rcp26_df.head()

Unnamed: 0,date,snow_albedo,snow_cover,snow_density,snow_depth,snowfall_sum,surface_latent_heat_flux_sum,surface_sensible_heat_flux_sum,surface_solar_radiation_downwards_sum,surface_thermal_radiation_downwards_sum,skin_temperature,total_precipitation_sum,avg_volumetric_water_content,band_1,band_2,band_3,band_4,band_5,band_6
0,2021-01-16,-0.197107,0.0,1.032769,-1.01918,1.717974,-0.9214,-0.054781,-1.003842,-0.849981,-1.025965,0.661742,0.886112,1,0,0,0,0,0
1,2021-01-16,-0.552336,0.0,-0.080745,-0.285494,0.178982,-0.974963,0.027886,-1.055132,-1.063152,-1.192185,-0.444784,0.768767,0,1,0,0,0,0
2,2021-01-16,0.706972,0.0,-0.101001,-0.385104,-0.17633,-1.040049,-0.945296,-1.096482,-1.357745,-1.470137,-0.7768,0.670211,0,0,1,0,0,0
3,2021-01-16,1.044204,0.0,-0.177357,-0.093501,-0.369136,-1.082807,-1.158907,-1.123193,-1.545966,-1.729279,-0.952838,1.035957,0,0,0,1,0,0
4,2021-01-16,1.076928,0.0,-0.14301,-0.53214,-0.445161,-1.117972,-1.125652,-1.137559,-1.515347,-1.85729,-1.011393,1.160411,0,0,0,0,1,0


In [None]:
rcp45_df.head()

Unnamed: 0,date,snow_albedo,snow_cover,snow_density,snow_depth,snowfall_sum,surface_latent_heat_flux_sum,surface_sensible_heat_flux_sum,surface_solar_radiation_downwards_sum,surface_thermal_radiation_downwards_sum,skin_temperature,total_precipitation_sum,avg_volumetric_water_content,band_1,band_2,band_3,band_4,band_5,band_6
0,2021-01-16,-0.377514,0.0,0.751851,-0.610371,2.156078,-0.79252,-1.249879,-0.989693,-0.731606,-0.792457,0.975563,-0.75956,1,0,0,0,0,0
1,2021-01-16,-1.332725,0.0,-0.140418,-0.6649,0.402016,-0.905282,-1.288331,-1.041867,-0.991538,-1.039961,-0.35558,-0.145438,0,1,0,0,0,0
2,2021-01-16,-0.893577,0.0,-0.170115,-0.539352,-0.163187,-1.044256,-1.283743,-1.08678,-1.202865,-1.274625,-0.822736,0.068607,0,0,1,0,0,0
3,2021-01-16,0.538003,0.0,-0.18616,-0.464306,-0.315325,-1.128043,-1.313058,-1.116311,-1.365741,-1.496094,-0.974059,0.170131,0,0,0,1,0,0
4,2021-01-16,1.105626,0.0,-0.057869,-0.670515,-0.489608,-1.082194,-1.136745,-1.13084,-1.491502,-1.56535,-1.110837,0.409299,0,0,0,0,1,0


In [None]:
rcp60_df.head()

Unnamed: 0,date,snow_albedo,snow_cover,snow_density,snow_depth,snowfall_sum,surface_latent_heat_flux_sum,surface_sensible_heat_flux_sum,surface_solar_radiation_downwards_sum,surface_thermal_radiation_downwards_sum,skin_temperature,total_precipitation_sum,avg_volumetric_water_content,band_1,band_2,band_3,band_4,band_5,band_6
0,2021-01-16,0.057161,0.0,1.054121,-0.969848,2.617378,-0.759697,-0.651414,-0.985177,-0.652902,-0.743306,1.369889,0.925558,1,0,0,0,0,0
1,2021-01-16,1.487986,0.0,-0.112247,-0.514766,0.451593,-0.85409,-0.767809,-1.037087,-0.837061,-0.919345,-0.363078,1.177006,0,1,0,0,0,0
2,2021-01-16,1.601918,0.0,-0.218275,0.262469,0.193223,-0.969033,-1.07863,-1.081741,-1.021659,-1.095792,-0.570459,0.00394,0,0,1,0,0,0
3,2021-01-16,2.010837,0.0,-0.249997,0.486713,0.095152,-1.035136,-0.980082,-1.113344,-1.048984,-1.249002,-0.647576,0.056165,0,0,0,1,0,0
4,2021-01-16,1.634515,0.0,-0.255918,0.047255,0.018869,-1.134399,-0.824624,-1.128583,-1.107532,-1.376497,-0.710562,0.070321,0,0,0,0,1,0


In [None]:
rcp85_df.head()

Unnamed: 0,date,snow_albedo,snow_cover,snow_density,snow_depth,snowfall_sum,surface_latent_heat_flux_sum,surface_sensible_heat_flux_sum,surface_solar_radiation_downwards_sum,surface_thermal_radiation_downwards_sum,skin_temperature,total_precipitation_sum,avg_volumetric_water_content,band_1,band_2,band_3,band_4,band_5,band_6
0,2021-01-16,-0.429983,0.0,0.50573,-0.573989,1.424503,-0.834318,-1.093144,-0.985146,-0.798185,-0.968846,0.365277,0.211481,1,0,0,0,0,0
1,2021-01-16,-0.566567,0.0,-0.159403,-0.168579,0.698136,-0.898848,-1.116068,-1.052496,-1.034927,-1.232548,-0.093437,1.340918,0,1,0,0,0,0
2,2021-01-16,0.79212,0.0,-0.29366,0.817242,0.635774,-0.996352,-1.235007,-1.094972,-1.027951,-1.347965,-0.203957,1.340918,0,0,1,0,0,0
3,2021-01-16,2.134788,0.0,-0.313987,1.083922,0.340982,-1.111493,-1.363052,-1.124028,-1.146343,-1.531649,-0.490605,1.340918,0,0,0,1,0,0
4,2021-01-16,2.147479,0.0,-0.275581,0.823076,0.327927,-1.152111,-1.361541,-1.135963,-1.334583,-1.636979,-0.508666,1.340918,0,0,0,0,1,0


In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

for df in dfs:
  df.drop('Unnamed: 0', axis=1, inplace=True)

# **Z-Score Normalization**

In [None]:
dfs = [rcp26_df, rcp45_df, rcp60_df, rcp85_df]

In [None]:
scaler = StandardScaler()

In [None]:
normalized_dfs = []

In [None]:
for df in dfs:
  numeric_cols = df.select_dtypes(include=['number']).columns
  numeric_df = df[numeric_cols]
  rest = df[['date', 'band']]
  scaled = scaler.fit_transform(numeric_df)
  scaled_df = pd.DataFrame(scaled, columns=numeric_cols)
  df = pd.concat([rest, scaled_df], axis=1)
  normalized_dfs.append(df)

In [None]:
rcp26_df = normalized_dfs[0]
rcp45_df = normalized_dfs[1]
rcp60_df = normalized_dfs[2]
rcp85_df = normalized_dfs[3]

In [None]:
rcp_folder = userdata.get('RCP_folder')
clean_dfs_path = os.path.join(rcp_folder, 'clean_dfs')

rcp26_df.to_csv(os.path.join(clean_dfs_path, 'rcp26_df_processed.csv'))
rcp45_df.to_csv(os.path.join(clean_dfs_path, 'rcp45_df_processed.csv'))
rcp60_df.to_csv(os.path.join(clean_dfs_path, 'rcp60_df_processed.csv'))
rcp85_df.to_csv(os.path.join(clean_dfs_path, 'rcp85_df_processed.csv'))