In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import seaborn
%matplotlib inline

# project paths
project_root_dir = os.path.normpath(os.getcwd() + os.sep + os.pardir)

data_path = os.path.join(project_root_dir, "data")
os.makedirs(data_path, exist_ok=True)

# function for reading data
def read_data(filename, date_cols=None, file_path=data_path):
    csv_path = os.path.join(file_path, filename)
    return pd.read_csv(csv_path, parse_dates=date_cols)

# function for saving data as csv file
def save_dataframe(df, filename, file_path=data_path):
    path = os.path.join(file_path, filename)
    df.to_csv(path, index=False)

In [2]:
train = read_data("TRAIN.CSV", date_cols=["Date"])
test = read_data("TEST_FINAL.csv", date_cols=["Date"])
submission = read_data("SAMPLE.csv")

In [3]:
from prepare import prepare_data

In [4]:
X_train, y_train, X_test, full_pipe = prepare_data(train, test)

In [7]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import make_pipeline

In [8]:
rf = make_pipeline(full_pipe, RandomForestRegressor(random_state=42, 
                                                    max_depth=30,
                                                    min_samples_leaf= 8, 
                                                    min_samples_split= 6,
                                                    max_features="sqrt",
                                                    n_jobs=-1))

rf.fit(X_train, y_train)

Pipeline(steps=[('columntransformer',
                 ColumnTransformer(transformers=[('num',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer())]),
                                                  ['Store_id']),
                                                 ('cat',
                                                  Pipeline(steps=[('simpleimputer',
                                                                   SimpleImputer(fill_value='NA',
                                                                                 strategy='constant')),
                                                                  ('onehotencoder',
                                                                   OneHotEncoder(handle_unknown='ignore',
                                                                                 sparse=False))]),
                                     

In [9]:
submission['Sales'] = rf.predict(X_test)
save_dataframe(submission,"rf_hyper2.csv")