# Library

In [1]:
# Native library
import copy
import collections
import multiprocessing as mp

import warnings
warnings.filterwarnings('ignore')

import os
import sys
path = os.path.join(os.pardir, os.pardir)
sys.path.append(path)

# Save object
import joblib

from tqdm import tqdm

# Data management
import numpy as np
import pandas as pd
import xarray as xr

import plotly.express as px

from src.constants import TARGET, TARGET_TEST, FOLDER, S_COLUMNS, G_COLUMNS, M_COLUMNS

# Data prepocessing
from src.features.preprocessing import Smoothor, Convertor
from umap import UMAP
from sklearn.preprocessing import StandardScaler, MinMaxScaler


# Hyperoptimization
from sklearn.pipeline import Pipeline
from sklearn.compose import TransformedTargetRegressor

import wandb

# Regressor models
from xgboost import XGBRegressor
from lce import LCERegressor

# Model evaluation
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_percentage_error

# Constant

In [2]:
MODEL_PATH = os.path.join('model', FOLDER, 'XGBoost', 'Aggregate')
DATA_PATH = os.path.join(os.pardir, os.pardir, 'data', 'interim', FOLDER)
# os.makedirs(MODEL_PATH, exist_ok=True)

# Import Data

In [3]:
xdf = xr.open_dataset(os.path.join(DATA_PATH, 'train.nc'))
xdf

# Create X & y

In [4]:
X, y = xdf.drop(TARGET), xdf[[TARGET]+S_COLUMNS].to_dataframe()[['Rice Yield (kg/ha)']].groupby(['ts_aug', 'ts_obs']).first()

In [5]:
y = y.reorder_levels(['ts_obs', 'ts_aug']).sort_index()

# Scale y

In [6]:
y_scaler = MinMaxScaler().fit(y)
y_scaled = y_scaler.transform(y)
y_scaled

array([[0.10714286],
       [0.10714286],
       [0.10714286],
       ...,
       [0.71428571],
       [0.71428571],
       [0.71428571]])

# Smoothor

In [7]:
Smoothor(mode='savgol').fit_transform(X)

# Convertor

In [8]:
Convertor(agg=True, weather=False).fit_transform(X)

Unnamed: 0_level_0,Unnamed: 1_level_0,Field size (ha),"Rice Crop Intensity(D=Double, T=Triple)",ndvi_max,ndvi_mean,ndvi_min,savi_max,savi_mean,savi_min,evi_max,evi_mean,...,osavi_min,rdvi_max,rdvi_mean,rdvi_min,mtvi1_max,mtvi1_mean,mtvi1_min,lswi_max,lswi_mean,lswi_min
ts_obs,ts_aug,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,0,3.4,3.0,0.526096,0.339800,0.022330,1.263033,1.169885,1.011164,9.624464,0.411286,...,0.022329,48.901329,26.008966,1.461540,7113.479492,3145.936768,211.609650,0.308699,0.161060,0.029738
0,1,3.4,3.0,0.526096,0.342784,-0.011549,1.263033,1.171377,0.994226,55.381062,3.566955,...,-0.011548,48.901329,26.280279,-0.737982,7113.479492,3167.618896,-79.897675,0.308699,0.161678,-0.009218
0,2,3.4,3.0,0.576668,0.358727,0.004875,1.288311,1.179348,1.002437,9.624464,-0.671296,...,0.004874,48.901329,28.009909,0.324986,7113.479492,3459.251953,-53.480156,0.308699,0.167691,0.002176
0,3,3.4,3.0,0.543041,0.347012,-0.000865,1.271499,1.173490,0.999568,9.624464,0.688180,...,-0.000865,48.901329,26.479292,-0.057544,7113.479492,3195.490234,-90.451935,0.308699,0.168196,0.045927
0,4,3.4,3.0,0.568646,0.353845,0.008548,1.284300,1.176907,1.004274,9.624464,0.744002,...,0.008548,49.870232,27.406881,0.568296,7199.322754,3327.938477,-23.989830,0.316231,0.164762,0.007198
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
556,5,2.3,3.0,0.857781,0.357780,0.014857,1.428829,1.178869,1.007428,4.551883,1.077509,...,0.014857,50.557060,25.147074,1.016154,5569.200684,2842.629639,-6.352991,0.435596,0.194993,-0.078745
556,6,2.3,3.0,0.661150,0.354350,-0.017497,1.330526,1.177155,0.991252,4.551883,0.762690,...,-0.017497,41.174095,24.546698,-1.307646,5280.271484,2714.889893,-453.011078,0.270656,0.162725,-0.071574
556,7,2.3,3.0,0.741412,0.372276,0.082438,1.370649,1.186116,1.041215,18.723934,1.736372,...,0.082435,43.391685,26.004728,5.897933,5388.504883,2904.160889,569.082275,0.270656,0.185747,-0.071574
556,8,2.3,3.0,0.817847,0.352681,-0.021671,1.408865,1.176320,0.989166,4.551883,1.065266,...,-0.021670,48.250706,24.985300,-1.640732,6068.014648,2818.085938,-504.226410,0.470940,0.180060,-0.071574


# Pipeline

In [9]:
step_list = [
    ('smoothor', Smoothor(mode=None)),
    ('convertor', Convertor()),
    ('scaler', StandardScaler()),
    ('dim_reductor', UMAP()),
    ('estimator', XGBRegressor())
]

pipe = Pipeline(step_list)

In [10]:
pipe.fit(X, y_scaled)