In [None]:
# House Price Prediction Notebook

# Step 1: Import necessary libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from src.data_preprocessing import load_data, preprocess_data
from src.model import train_model, evaluate_model

# Step 2: Load the data
train_df = load_data('data/train.csv')

# Step 3: Preprocess the data
X, y = preprocess_data(train_df)

# Step 4: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Train the model
model = train_model(X_train, y_train)

# Step 6: Evaluate the model
mse = evaluate_model(model, X_test, y_test)
print(f'Mean Squared Error: {mse}')

# Step 7: Visualize feature importance
from src.utils import plot_feature_importance
plot_feature_importance(model, train_df.columns[:-1])  # Exclude 'SalePrice' from features