In [1]:
import logging.config

import matplotlib.pyplot as plt
import pandasx as pdx
from sktime.forecasting.base import ForecastingHorizon
from sktime.utils.plotting import plot_series
from sktimex import SimpleCNNForecaster

In [2]:
TARGET = 'import_kg'  # target column
GROUP = 'item_country'  # time series
YEARS = 7  # data collected years
DATETIME = 'imp_date'
MODEL = 'model'

weather_columns = ['import_kg', 'mean_temperature', 'vap_pressure', 'evaporation', 'rainy_days']
financial_columns = ['import_kg', 'crude_oil_price', 'sandp_500_us', 'sandp_sensex_india', 'shenzhen_index_china', 'nikkei_225_japan']

In [6]:
df = pdx.read_data('vw_food_import_train_test_newfeatures.csv', na_values=['(null)'])
df.info()

Loading vw_food_import_train_test_newfeatures.csv ...
... done ((29568, 18))
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29568 entries, 0 to 29567
Data columns (total 18 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   item_country                      29568 non-null  object 
 1   imp_month                         29568 non-null  object 
 2   imp_date                          29568 non-null  object 
 3   import_kg                         29568 non-null  int64  
 4   prod_kg                           9660 non-null   float64
 5   avg_retail_price_src_country      733 non-null    float64
 6   producer_price_tonne_src_country  7366 non-null   float64
 7   crude_oil_price                   29568 non-null  float64
 8   sandp_500_us                      29568 non-null  float64
 9   sandp_sensex_india                29568 non-null  float64
 10  shenzhen_index_china              29568 non-null  flo

In [3]:
df = pdx.read_data('vw_food_import_train_test_newfeatures.csv',
                       datetime=('imp_date', '[%Y/%m/%d %H:%M:%S]', 'M'),
                       onehot=['imp_month'],
                       ignore=['imp_month', 'prod_kg', 'avg_retail_price_src_country',
                               'producer_price_tonne_src_country',
                               'max_temperature', 'min_temperature'],
                       numeric=['evaporation', 'mean_temperature', 'rainy_days', 'vap_pressure'],
                       # periodic=('imp_date', 'M'),
                       na_values=['(null)'])

Loading vw_food_import_train_test_newfeatures.csv ...
... done ((29568, 24))


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 29568 entries, 0 to 29567
Data columns (total 24 columns):
 #   Column                Non-Null Count  Dtype    
---  ------                --------------  -----    
 0   crude_oil_price       29568 non-null  float64  
 1   evaporation           29484 non-null  float64  
 2   imp_date              29568 non-null  period[M]
 3   imp_month_apr         29568 non-null  uint8    
 4   imp_month_aug         29568 non-null  uint8    
 5   imp_month_dec         29568 non-null  uint8    
 6   imp_month_feb         29568 non-null  uint8    
 7   imp_month_jan         29568 non-null  uint8    
 8   imp_month_jul         29568 non-null  uint8    
 9   imp_month_jun         29568 non-null  uint8    
 10  imp_month_mar         29568 non-null  uint8    
 11  imp_month_may         29568 non-null  uint8    
 12  imp_month_nov         29568 non-null  uint8    
 13  imp_month_oct         29568 non-null  uint8    
 14  imp_month_sep         29568 non-null  