In [None]:
import numpy as np # linear algebra
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
import sklearn.metrics as sm
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import StackingRegressor
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def metrics(pred, y_test):
    evs = sm.explained_variance_score(y_test, pred)
    me = sm.max_error(y_test, pred)
    mae = sm.mean_absolute_error(y_test, pred)
    mse = sm.mean_squared_error(y_test, pred)
    rmse = np.sqrt(mse)
    #msle = sm.mean_squared_log_error(y_test, pred)
    m_ae = sm.median_absolute_error(y_test, pred)
    r2 = sm.r2_score(y_test, pred)
    #mpd = sm.mean_poisson_deviance(y_test, pred)
    #mgd = sm.mean_gamma_deviance(y_test, pred)
    mape = mean_absolute_percentage_error(pred, y_test)
    return({'Explained Variance Score': evs,
            'Max Error': me,
            'Mean Absolute Error': mae,
            'Mean Squared Error': mse,
            'Root Mean Squared Error': rmse,
            #'Mean Squared Log Error': msle,
            'Median Absolute Error': m_ae,
            'R² Score': r2,
            #'Mean Poisson Deviance': mpd,
            #'Mean Gamma Deviance': mgd,
            'Mean Absolute Percentage Error': mape
            })

In [None]:


items = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/items.csv')
item_cats = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/item_categories.csv')
shops = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/shops.csv')
sales = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/sales_train.csv')
test = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/test.csv')
submission = pd.read_csv('/kaggle/input/competitive-data-science-predict-future-sales/sample_submission.csv')

In [None]:

sales_copy = sales.copy()
sales_copy = sales_copy.pivot_table(
    index=['shop_id', 'item_id'],
    values=['item_cnt_day'],
    columns=['date_block_num'],
    fill_value=0,
    aggfunc='sum'
).reset_index()

sales_copy

In [None]:

first_train = test.copy()
first_train = first_train.merge(sales_copy, how='left', on=['shop_id', 'item_id']).fillna(0).drop(
    ['ID', 'shop_id', 'item_id'], axis=1)
first_train

In [None]:
X_train, y_train = first_train.values[:,:-2], first_train.values[:, -2:-1].ravel()
X_valid, y_valid = first_train.values[:,1:-1], first_train.values[:, -1:].ravel()
X_test = first_train.values[:, 2:]

In [None]:
reg1 = LinearRegression()
reg1.fit(X_train, y_train)
pred1 = reg1.predict(X_valid)
reg1Metric = metrics(pred1,y_valid)

In [None]:
reg2 = DecisionTreeRegressor()
reg2.fit(X_train, y_train)
pred2 = reg2.predict(X_valid)
reg2Metric = metrics(pred2,y_valid)

In [None]:
reg3 = RandomForestRegressor()
reg3.fit(X_train, y_train)
pred3 = reg3.predict(X_valid)
reg3Metric = metrics(pred3,y_valid)

In [None]:
estimators = [('1', reg1), 
              ('2', reg2), 
              ('3', reg3),
              ]
voting_regressor = VotingRegressor(estimators=estimators)
stacking_regressor = StackingRegressor(estimators=estimators)

In [None]:
vreg = voting_regressor.fit(X_train, y_train)
sreg = stacking_regressor.fit(X_train, y_train)

vregpred = vreg.predict(X_valid)
vregmetrics = metrics(vregpred,y_valid)

sregpred = sreg.predict(X_valid)
sregmetrics = metrics(sregpred,y_valid)

In [None]:
names = ['LinearRegression', 'DecisionTreeRegressor', 'RandomForestRegressor', 'VotingRegressor','StackingRegressor']
pd.DataFrame([reg1Metric, 
              reg2Metric,
              reg3Metric,
              vregmetrics,
              sregmetrics],
              index = names)

In [None]:
reg1Result = reg1.predict(X_test)
reg2Result = reg2.predict(X_test)
reg3Result = reg3.predict(X_test)
vregResult = vreg.predict(X_test)
sregResult = sreg.predict(X_test)

In [None]:
PredictBag = {
    'LR':reg1Result,
    'DTR':reg2Result,
    'RFR':reg3Result,
    'VREF':vregResult,
    'SREF':vregResult,
}

In [None]:
predict = pd.concat([submission, pd.DataFrame(PredictBag)], axis = 1).drop(["item_cnt_month","ID"], axis=1)

In [None]:
meanpredict = predict.mean(axis=1).rename("item_cnt_month").reset_index().rename(columns={'index': 'ID'})
meanpredict

In [None]:
meanpredict.to_csv('./results.csv', index=False)

In [None]:
import pickle

pickle.dump(reg1, open("./reg1_model.pkl", 'wb'))
pickle.dump(reg2, open("./reg2_model.pkl", 'wb'))
pickle.dump(reg3, open("./reg3_model.pkl", 'wb'))
pickle.dump(voting_regressor, open("./vref_model.pkl", 'wb'))
pickle.dump(stacking_regressor, open("./sref_model.pkl", 'wb'))
