In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.metrics import mean_squared_error

In [None]:
df=pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/train.csv")
df

In [None]:
sns.heatmap(df.corr())

In [None]:

# Extract feature and target arrays
X, y = df.drop('sales', axis=1), df[['sales']]

In [None]:
# Extract text features
cats = X.select_dtypes(exclude=np.number).columns.tolist()

# Convert to Pandas category
for col in cats:
    X[col] = X[col].astype('category')

In [None]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1)

In [None]:
# Create regression matrices
dtrain_reg = xgb.DMatrix(X_train, y_train, enable_categorical=True)
dtest_reg = xgb.DMatrix(X_test, y_test, enable_categorical=True)

In [None]:
# Define hyperparameters
params = {"objective": "reg:squarederror", "tree_method": "gpu_hist"}

n = 100
model = xgb.train(
   params=params,
   dtrain=dtrain_reg,
   num_boost_round=n,
)

In [None]:

preds = model.predict(dtest_reg)


In [None]:
rmse = mean_squared_error(y_test, preds, squared=False)

print(f"RMSE of the base model: {rmse:.3f}")

In [None]:
params = {"objective": "reg:squarederror", "tree_method": "gpu_hist"}
n = 1500

evals = [(dtest_reg, "validation"), (dtrain_reg, "train")]


model = xgb.train(
   params=params,
   dtrain=dtrain_reg,
   num_boost_round=n,
   evals=evals,
   verbose_eval=125 # Every ten rounds
)

In [None]:
params = {"objective": "reg:squarederror", "tree_method": "gpu_hist"}
n = 1000

results = xgb.cv(
   params, dtrain_reg,
   num_boost_round=n,
   nfold=5,
   early_stopping_rounds=50
)

In [None]:
results.head()

In [None]:
best_rmse = results['test-rmse-mean'].min()
best_rmse

In [None]:
test=pd.read_csv("/kaggle/input/store-sales-time-series-forecasting/test.csv")
# Extract text features
cat = test.select_dtypes(exclude=np.number).columns.tolist()

# Convert to Pandas category
for col in cats:
    test[col] = test[col].astype('category')
dtest = xgb.DMatrix(test,enable_categorical=True)
preds = model.predict(dtest)

In [None]:
Id=test["id"]

In [None]:
final = np.clip(preds, 0, np.inf) #Convert any negative prediction to 0

output = pd.DataFrame({'id': Id,'sales': final})

print(output)
output.to_csv('submission.csv', index=False)