In [6]:
# Start constructing the improved version of the notebook with additional insights and modeling enhancements.
from nbformat.v4 import new_notebook, new_code_cell, new_markdown_cell
from nbformat import write

# Create a list of new cells to add
improved_cells = [
    new_markdown_cell("## 🔍 Enhanced Exploratory Data Analysis"),
    new_code_cell("""# Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(numeric_only=True), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Heatmap")
plt.show()"""),
    
    new_code_cell("""# Distribution Plots
num_features = ['Price', 'Discount', 'Rating (out of 5)', 'Amount Sold']
for feature in num_features:
    plt.figure(figsize=(6, 4))
    sns.histplot(df[feature], kde=True, bins=30)
    plt.title(f'Distribution of {feature}')
    plt.show()"""),

    new_code_cell("""# Boxplots to check for outliers
for feature in num_features:
    plt.figure(figsize=(6, 4))
    sns.boxplot(x=df[feature])
    plt.title(f'Boxplot of {feature}')
    plt.show()"""),

    new_markdown_cell("## 🧠 Feature Engineering"),
    new_code_cell("""# Add price after discount
df['Price_After_Discount'] = df['Price'] * (1 - df['Discount'] / 100)"""),

    new_markdown_cell("## ⚙️ Model Comparison with Multiple Regressors"),
    new_code_cell("""from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
from xgboost import XGBRegressor

models = {
    'Decision Tree': DecisionTreeRegressor(**grid_search.best_params_, random_state=42),
    'Random Forest': RandomForestRegressor(random_state=42),
    'Linear Regression': LinearRegression(),
    'XGBoost': XGBRegressor(random_state=42)
}

results = {}
for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5, scoring='r2')
    results[name] = scores
    print(f"{name} | Mean R²: {scores.mean():.4f} | Std: {scores.std():.4f}")"""),

    new_markdown_cell("## 📊 Residual Analysis"),
    new_code_cell("""# Residuals for the best model (Decision Tree from GridSearch)
best_model = DecisionTreeRegressor(**grid_search.best_params_, random_state=42)
best_model.fit(X_train, y_train)
y_pred_best = best_model.predict(X_test)
residuals = y_test - y_pred_best

plt.figure(figsize=(6, 4))
sns.histplot(residuals, kde=True)
plt.title("Residuals Distribution")
plt.xlabel("Residual")
plt.show()

plt.figure(figsize=(6, 4))
plt.scatter(y_test, residuals)
plt.axhline(0, color='red', linestyle='--')
plt.title("Residuals vs Actual")
plt.xlabel("Actual")
plt.ylabel("Residual")
plt.show()"""),
]

# Create a new notebook object
nb = new_notebook()

# Append the improved cells to the new notebook
nb.cells.extend(improved_cells)

# Save the enhanced notebook
enhanced_notebook_path = "project_enhanced2.ipynb"
with open(enhanced_notebook_path, "w") as f:
    write(nb, f)

enhanced_notebook_path


'project_enhanced2.ipynb'