# Data Preprocessing, Model Creation and Optimization
## What this notebook does is:

- Preprocess the data
- Train the model
- Save the trained model
- Prepare data for LSTM

In [1]:
# Import necessary libraries
import sys
import os
import logging

In [2]:
# Get the current working directory of the project
current_dir = os.getcwd()
print(current_dir)

# Get the parent directory
parent_dir = os.path.dirname(current_dir)
print(parent_dir)

# Insert the path to the parent directory
sys.path.insert(0, parent_dir)

# # Insert the path to the Scripts directory
# sys.path.insert(0, os.path.join(parent_dir, 'Scripts'))

# print(sys.path)

c:\Users\HP\Desktop\KAIM-Cohort-3\Week 4\rossmann-pharmaceutical-ml-and-dl-sales-forecasting\notebooks
c:\Users\HP\Desktop\KAIM-Cohort-3\Week 4\rossmann-pharmaceutical-ml-and-dl-sales-forecasting


In [3]:
# Import custom scripts
from scripts.data_preprocessing import load_data, preprocess_data, create_lstm_dataset, build_lstm_model
from scripts.model_training import train_rf_model, save_rf_model, save_lstm_model, load_lstm_model, train_lstm_model
from scripts.utils import log

In [4]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,  # Use DEBUG for detailed logs
    format="%(asctime)s - %(levelname)s - %(message)s",
    handlers=[
        logging.FileHandler("../logs/model.log"),  # Log to a file
        logging.StreamHandler()  # Log to the console
    ]
)

logger = logging.getLogger()

In [5]:
try:
    df = load_data('../data/train.csv')
    print("Loaded DataFrame columns:", df.columns)  # Debugging aid
    X, y = preprocess_data(df)
except Exception as e:
    log(f"Error during data loading/preprocessing: {e}")
    raise  # Re-raise the error to inspect it in detail

  df = pd.read_csv(file_path)


Loaded DataFrame columns: Index(['Store', 'DayOfWeek', 'Date', 'Sales', 'Customers', 'Open', 'Promo',
       'StateHoliday', 'SchoolHoliday'],
      dtype='object')


In [6]:
print(df.columns)

Index(['Store', 'DayOfWeek', 'Date', 'Sales', 'Customers', 'Open', 'Promo',
       'StateHoliday', 'SchoolHoliday'],
      dtype='object')


In [7]:
# Train and save the Random Forest model
try:
    rf_model = train_rf_model(X, y)
    save_rf_model(rf_model, '../rf_model.pkl')
except Exception as e:
    log(f"Error during Random Forest model training: {e}")

In [8]:
# Prepare data for LSTM
try:
    # Reshape X if needed
    X_lstm, y_lstm = create_lstm_dataset(X.reshape(-1, 1), time_steps=5)

    # Train and save the LSTM model
    lstm_model = train_lstm_model(X_lstm, y_lstm)
    save_lstm_model(lstm_model, '../lstm_model.h5')
except Exception as e:
    log(f"Error during LSTM model training: {e}")


  super().__init__(**kwargs)


Epoch 1/5
[1m127152/127152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m259s[0m 2ms/step - loss: 0.2327
Epoch 2/5
[1m127152/127152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 2ms/step - loss: 0.2125
Epoch 3/5
[1m127152/127152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 2ms/step - loss: 0.2107
Epoch 4/5
[1m127152/127152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m234s[0m 2ms/step - loss: 0.2117
Epoch 5/5
[1m127152/127152[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m235s[0m 2ms/step - loss: 0.2104




Optimized LSTM Model Training Loss: 0.21103939414024353


# Start the API (run this in a separate terminal or command line)
# !python model_serving.py