# Product Demand Forecast

### Import Libraries

In [1]:
import pandas as pd
import numpy as np

from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.svm import LinearSVR
from sklearn.metrics import r2_score

### Import Data and Explore

In [2]:
pdf = pd.read_csv('Historical Product Demand.csv')

In [3]:
pdf.head()

Unnamed: 0,Product_Code,Warehouse,Product_Category,Date,Order_Demand
0,Product_0993,Whse_J,Category_028,2012/7/27,100
1,Product_0979,Whse_J,Category_028,2012/1/19,500
2,Product_0979,Whse_J,Category_028,2012/2/3,500
3,Product_0979,Whse_J,Category_028,2012/2/9,500
4,Product_0979,Whse_J,Category_028,2012/3/2,500


In [4]:
pdf.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1048575 entries, 0 to 1048574
Data columns (total 5 columns):
Product_Code        1048575 non-null object
Warehouse           1048575 non-null object
Product_Category    1048575 non-null object
Date                1037336 non-null object
Order_Demand        1048575 non-null object
dtypes: object(5)
memory usage: 40.0+ MB


In [5]:
# Check for missing values
pdf.isnull().sum(axis = 0)

Product_Code            0
Warehouse               0
Product_Category        0
Date                11239
Order_Demand            0
dtype: int64

In [6]:
# Drop missing rows
pdf = pdf.dropna()

### Preprocessing

In [7]:
encoder = LabelEncoder()

pdf['Product_Code']= encoder.fit_transform(pdf['Product_Code'])
pdf['Warehouse']= encoder.fit_transform(pdf['Warehouse'])
pdf['Product_Category']= encoder.fit_transform(pdf['Product_Category'])

pdf['year'] = pd.DatetimeIndex(pdf['Date']).year
pdf['month'] = pd.DatetimeIndex(pdf['Date']).month
pdf=pdf.drop(['Date'],axis=1)

pdf['Order_Demand'] = pdf['Order_Demand'].str.replace('(',"")
pdf['Order_Demand'] = pdf['Order_Demand'].str.replace(')',"")
pdf['Order_Demand'] = pdf['Order_Demand'].astype('int64')

In [8]:
X = pdf.drop(['Order_Demand'],axis=1)
y = pdf['Order_Demand']

In [9]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [10]:
# Split the data, 80% training, 10% test, 10% validation
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=0)
X_test, X_val, y_test, y_val = train_test_split(X_train,y_train,test_size=0.5,random_state=1)

### Decision Tree Regressor

In [11]:
# Try mse
model = DecisionTreeRegressor(criterion='mse',random_state=0).fit(X_train,y_train)
score = model.score(X_test,y_test)
y_pred = model.predict(X_test)
print('r2_score: '+str(r2_score(y_test,y_pred)))

r2_score: 0.3798237754684679


In [12]:
# Try friedman_mse
model = DecisionTreeRegressor(criterion='friedman_mse',random_state=0).fit(X_train,y_train)
score = model.score(X_test,y_test)
y_pred = model.predict(X_test)
print('r2_score: '+str(r2_score(y_test,y_pred)))

r2_score: 0.3798237754684679


### Support Vector Regressor

In [13]:
# Try LinearSVR
model = LinearSVR().fit(X_train,y_train)
score = model.score(X_test,y_test)
y_pred = model.predict(X_test)
print('r2_score: '+str(r2_score(y_test,y_pred)))

r2_score: -0.023731194607323758


In [None]:
# Try poly
model = SVR(kernel='poly',gamma='auto').fit(X_train,y_train)
score = model.score(X_test,y_test)
y_pred = model.predict(X_test)
print('r2_score: '+str(r2_score(y_test,y_pred)))