# Apply the RandomForestRegressor Model Final Version

## Sales Prediction on "REAL_DATA.csv" DataSet

In [6]:
import pandas as pd
import joblib

# Load the stored model
model_path = './model/random_forest_sales_model.pkl'  # Update path if needed
rf_model = joblib.load(model_path)

# Load the new dataset
file_path = './data/REAL_DATA_TEST.csv'  # Update path with the new dataset
data = pd.read_csv(file_path, delimiter=';')

# Separate rows where the store was open
data_open = data[data['open'] == 1].reset_index(drop=True)

# Preserve the columns to keep
columns_for_future = ['date', 'open', 'state_holiday', 'school_holiday', 'index']
preserved_cols_data = data_open[columns_for_future].copy()

# Drop the ['state_holiday', 'school_holiday', 'index', 'date', 'open'] columns for prediction
data_open_for_prediction = data_open.drop(columns=columns_for_future)

# Predict sales for rows where the store was open
sales_prediction = rf_model.predict(data_open_for_prediction)

# Combine open and remove columns
combined_predicted_data = pd.concat([data_open_for_prediction, preserved_cols_data], axis=1)
combined_predicted_data['sales'] = sales_prediction

# Assign sales = 0 to all rows initially
data['sales'] = 0.0

# Update the rows where the store was open with the predicted sales
data.loc[data['open'] == 1, 'sales'] = combined_predicted_data['sales'].values

# Save the new dataset with predicted sales
output_file = './data/javier_dastas_sales_predictions.csv'
data.to_csv(output_file, index=False)

print(f"Predictions completed. The dataset with predicted sales has been saved to '{output_file}'.")

Predictions completed. The dataset with predicted sales has been saved to './data/javier_dastas_sales_predictions.csv'.
