## 2. Data Processing

#### A360 MDK interface

In [1]:
a360ai

<A360 AI Interface for project: Product Demand Forecasting>

In [2]:
# Get default data repo
DATAREPO_LIST = a360ai.list_datarepos()
DATAREPO = DATAREPO_LIST['name'][0]
DATAREPO

'Product Demand Forecasting'

In [3]:
import numpy as np
import pandas as pd

In [4]:
import warnings
warnings.filterwarnings('ignore')

### 1. Set Default Data Repo and load data

In [5]:
a360ai.set_default_datarepo(DATAREPO)

In [6]:
a360ai.list_datasets()

Unnamed: 0,base_name,extension,size
1,X.csv,csv,97672.0
2,X.parquet,parquet,20323.0
3,product-demand-2015-2020-a.csv,csv,44133.0
4,rf_model.pkl,pkl,11465124.0
5,y.csv,csv,10765.0
6,y.parquet,parquet,5080.0


In [7]:
data = a360ai.load_dataset("product-demand-2015-2020-a.csv")

In [8]:
data['date'] = pd.to_datetime(data.date, format='%Y-%m-%d')

### 2. Data cleaning, data engineering, data preprocessing

#### Create date features

In [8]:
def create_date_features(df):
    df['month'] = df.date.dt.month
    df['day_of_month'] = df.date.dt.day
    df['day_of_year'] = df.date.dt.dayofyear
    df['week_of_year'] = df.date.dt.weekofyear
    df['year'] = df.date.dt.year
    df["is_wknd"] = df.date.dt.weekday // 4
    df['is_month_start'] = df.date.dt.is_month_start.astype(int)
    df['is_month_end'] = df.date.dt.is_month_end.astype(int)
    return df

In [9]:
data = create_date_features(data)

In [10]:
data

Unnamed: 0,date,trend-index,sales,month,day_of_month,day_of_year,week_of_year,year,is_wknd,is_month_start,is_month_end
0,2015-01-01,25.72,15.00,1,1,1,1,2015,0,1,0
1,2015-01-02,24.56,22.50,1,2,2,1,2015,1,0,0
2,2015-01-03,25.16,29.25,1,3,3,1,2015,1,0,0
3,2015-01-04,23.44,30.75,1,4,4,1,2015,1,0,0
4,2015-01-05,29.16,32.75,1,5,5,2,2015,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
1941,2020-04-25,200.61,410.20,4,25,116,17,2020,1,0,0
1942,2020-04-26,194.13,410.20,4,26,117,17,2020,1,0,0
1943,2020-04-27,192.87,411.20,4,27,118,18,2020,0,0,0
1944,2020-04-28,213.39,413.20,4,28,119,18,2020,0,0,0


#### One-Hot encoding

In [11]:
data = pd.get_dummies(data, columns=['month'])
data.head()

Unnamed: 0,date,trend-index,sales,day_of_month,day_of_year,week_of_year,year,is_wknd,is_month_start,is_month_end,...,month_3,month_4,month_5,month_6,month_7,month_8,month_9,month_10,month_11,month_12
0,2015-01-01,25.72,15.0,1,1,1,2015,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2015-01-02,24.56,22.5,2,2,1,2015,1,0,0,...,0,0,0,0,0,0,0,0,0,0
2,2015-01-03,25.16,29.25,3,3,1,2015,1,0,0,...,0,0,0,0,0,0,0,0,0,0
3,2015-01-04,23.44,30.75,4,4,1,2015,1,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2015-01-05,29.16,32.75,5,5,2,2015,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Export data- ready for model training  

In [12]:
X = data.drop(columns=['date', 'sales'])
y = data['sales']

In [13]:
X.columns

Index(['trend-index', 'day_of_month', 'day_of_year', 'week_of_year', 'year',
       'is_wknd', 'is_month_start', 'is_month_end', 'month_1', 'month_2',
       'month_3', 'month_4', 'month_5', 'month_6', 'month_7', 'month_8',
       'month_9', 'month_10', 'month_11', 'month_12'],
      dtype='object')

### 3. Write dataaset to Data Repo

In [14]:
a360ai.write_dataset(X,"X", overwrite=True)
a360ai.write_dataset(y,"y", overwrite=True)

True