In [None]:
from google.colab import drive

def mount_drive_and_check_path(directory_path):
    # Mount Google Drive
    drive.mount('/content/drive', force_remount=True)

    # Check if the directory exists
    if not os.path.exists(directory_path):
        print(f"Directory {directory_path} does not exist.")
        return

    # Add the directory to the system path if it's not already there
    if directory_path not in sys.path:
        sys.path.append(directory_path)
        print(f"Added {directory_path} to system path.")

# Call the function with your directory
mount_drive_and_check_path('/content/drive/MyDrive/tenX/w4/')


# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import logging
import sys
import os

# Add the directory containing your Python files to the system path
sys.path.append('/content/drive/MyDrive/tenX/w4/')  # Adjust this path as needed

# Import custom classes
# from evaluator import Evaluator
from preprocessor import Preprocessor
from model_builder import ModelBuilder


# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

# Load data
logging.info("Loading data...")
try:
    train = pd.read_csv('/content/drive/MyDrive/tenX/w4/Data/train.csv')
    test = pd.read_csv('/content/drive/MyDrive/tenX/w4/Data/test.csv')
    store = pd.read_csv('/content/drive/MyDrive/tenX/w4/Data/store.csv')
except FileNotFoundError as e:
    logging.error(f"File not found: {e}")
    raise
except pd.errors.EmptyDataError:
    logging.error("One of the CSV files is empty.")
    raise
except pd.errors.ParserError as e:
    logging.error(f"Error parsing CSV file: {e}")
    raise

logging.info(f"Train shape: {train.shape}, Test shape: {test.shape}, Store shape: {store.shape}")

# Merge data
logging.info("Merging data...")
train = pd.merge(train, store, on='Store', how='left')
test = pd.merge(test, store, on='Store', how='left')

logging.info(f"Merged train shape: {train.shape}, Merged test shape: {test.shape}")

# Preprocess data
logging.info("Preprocessing data...")
preprocessor = Preprocessor()
try:
    train_processed = preprocessor.preprocess(train)
    test_processed = preprocessor.preprocess(test)
except Exception as e:
    logging.error(f"Preprocessing error: {e}")
    raise

logging.info(f"Processed train shape: {train_processed.shape}, Processed test shape: {test_processed.shape}")

# Split data
logging.info("Splitting data...")
X = train_processed.drop(['Sales', 'Customers'], axis=1)
y = train_processed['Sales']
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

logging.info(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
logging.info(f"X_val shape: {X_val.shape}, y_val shape: {y_val.shape}")

# Build and train model
logging.info("Building and training model...")
model_builder = ModelBuilder()
model = model_builder.build_model(X_train, y_train)

# Make predictions
logging.info("Making predictions...")
y_pred = model.predict(X_val)

# Evaluate model
logging.info("Evaluating model...")
# evaluator = Evaluator()
# metrics = evaluator.evaluate(y_val, y_pred)

# print("Model Performance Metrics:")
# for metric, value in metrics.items():
#     print(f"{metric.upper()}: {value:.4f}")

# Save model
logging.info("Saving model...")
model_save_path = '/content/drive/MyDrive/tenX/w4/models'
os.makedirs(model_save_path, exist_ok=True)
model_builder.save_model(model_save_path)

logging.info("Process completed successfully!")