# Library

In [1]:
import glob
import joblib

import warnings
warnings.filterwarnings('ignore')

import os
from os.path import join
import sys
path = os.path.join(os.pardir, os.pardir)
sys.path.append(path)

import numpy as np
import pandas as pd
import xarray as xr

from scipy.signal import savgol_filter
from src.data.make_preprocessing import merge_satellite, add_observation, add_weather, statedev_fill, features_modification, compute_vi

from sklearn.preprocessing import MinMaxScaler, StandardScaler
from src.data.preprocessing import Smoother

# from datascaler import DatasetScaler

from src.constants import M_COLUMNS, S_COLUMNS, G_COLUMNS, TARGET, FOLDER, TARGET_TEST


ROOT_DIR = os.path.join(os.pardir, os.pardir)
test = False
folder = 'augment_100_5'

In [2]:
file_name = "train.nc"
if test:
    file_name = "test.nc"


# Load Satellite Dataset
if folder == "augment_100_5":
    xds = merge_satellite(file_name)
else:
    path_sat = join(ROOT_DIR, "data", "external", "satellite", folder, file_name)
    xds = xr.open_dataset(path_sat, engine="scipy")
    
xds

In [3]:

def categorical_encoding(xds: xr.Dataset) -> xr.Dataset:
    # Encode Rice Crop Intensity feature D = 2 and T = 3
    xds["Rice Crop Intensity(D=Double, T=Triple)"] = (
        xds["Rice Crop Intensity(D=Double, T=Triple)"]
        .str.replace("D", "2")
        .str.replace("T", "3")
        .astype(np.int8)
    )
    return xds

file_name = "train_enriched.csv"
if test:
    file_name = "test_enriched.csv"

path = join(ROOT_DIR, "data", "interim", file_name)
# Read csv EY data
df = pd.read_csv(path, index_col=0)
# Set index name as ts_obs for linked both Dataset
df.index.name = "ts_obs"
# # Convert pandas.DataFrame into xarray.Dataset and merge on ts_obs

xds = xr.merge([xds, df.to_xarray()], compat='override')
# # Encode categoricals data
xds = categorical_encoding(xds)

In [4]:
xds = add_weather(xds)
xds['time'].isnull().any()

In [5]:
# Compute vegetable indices
xds = compute_vi(xds)
xds['time'].isnull().any()

In [6]:
xds = statedev_fill(xds)
xds['time'].isnull().any()

In [7]:
xds = Smoother(mode='savgol').transform(xds)
xds['time'].isnull().any()

In [8]:
xds['time'].isnull().any()

In [9]:
xds = features_modification(xds, test)
xds

<xarray.DataArray 'time' ()>
array(False)
