In [None]:
##Import kaggle
pip install kaggle

In [None]:
##Download dataset
!kaggle competitions download -c house-prices-advanced-regression-techniques

In [None]:
##imports
import numpy as np
import pandas as pd
from pathlib import Path

##Paths
Data_Dir = Path(r"C:\Users\chand\Desktop\ML Projects\Kaggle- house-price")
train_path = Data_Dir/"train.csv"
test_path = Data_Dir/"test.csv"
desc_path = Data_Dir/"data_description.txt"

##load
train = pd.read_csv(train_path)
test = pd.read_csv(test_path)

##Shapes
print("Train shape:", train.shape)
print("test shape:", test.shape)

#target tansform for metrix
y= np.log1p(train["SalePrice"].values)
X=train.drop(columns=["SalePrice"])

##Remove ID
train_id = X[["Id"]]
test_id = test["Id"]
X=X.drop(columns=["Id"])
test_no_id = test.drop(columns=["Id"])

#treat clearly categorical  numeric codes as string
X["MSSubClass"]= X["MSSubClass"].astype(str)
test_no_id["MSSubClass"]=test_no_id["MSSubClass"].astype(str)

##Quick Peek
display(train.head(3))
print("\n Top missing (train)")
print(X.isna().sum().sort_values(ascending=False).head(20))

print("\nTop missing (test):")
##print(test_no_id.isna()sum().sort_values(ascending=False).head(20))

#quick target check
print("\nSalePrice(original)sumamry:")
print(train["SalePrice"].describe())
print("\nSalePrice (log1p) mean/std:", y.mean().round(4), y.std().round(4))

In [None]:
##Preprocessiong Step
##Combine test and train for processing
n_train =X.shape[0]
all_data = pd.concat([X, test_no_id], axis=0).reset_index(drop=True)
print("Combined shape:", all_data.shape)

In [None]:
##Missing value imputation
#Copy combined code
all_data_imp = all_data.copy()

## 1. Fill categorical "no feature" with "None"
none_cols = ["Alley","BsmtQual","BsmtCond","BsmtExposure","BsmtFinType1","BsmtFinType2","FireplaceQu",
             "GarageType","GarageFinish","GarageQual","GarageCond","PoolQC","Fence","MiscFeature"]
for col in none_cols:
    all_data_imp[col]=all_data_imp[col].fillna("None")

## 2. Fill numerical "no feature" with 0
zero_cols =["BsmtFinSF1","BsmtFinSF2","BsmtUnfSF","TotalBsmtSF","BsmtFullBath","BsmtHalfBath","GarageYrBlt","GarageCars","GarageArea"]
for col in zero_cols:
    all_data_imp[col]= all_data_imp[col].fillna(0)


## 3. LotFrontage by Neighbhorhood median
all_data_imp["LotFrontage"]=all_data_imp.groupby("Neighborhood")["LotFrontage"].transform(lambda x: x.fillna(x.fillna(x.median())))

## 4. Remaining categorical: fill with mode
cat_cols = all_data_imp.select_dtypes(include="object").columns
for col in cat_cols:
    if all_data_imp[col].isna().sum()>0:
        all_data_imp[col]=all_data_imp[col].fillna(all_data_imp[col].mode()[0])
        
## 5. Remaining numeric:fill with median
num_cols = all_data_imp.select_dtypes(include="object").columns
for col in num_cols:
    if all_data_imp[col].isna().sum()>0:
        all_data_imp[col]=all_data_imp[col].fillna(all_data_imp[col].median())


all_data_imp["MasVnrArea"]=all_data_imp["MasVnrArea"].fillna(0)

print(" Missing values after imputation:", all_data_imp.isna().sum().sum())

In [None]:
##Code to check if any missing values after imputation

missing_cols = all_data_imp.isna().sum()
missing_cols = missing_cols[missing_cols>0].sort_values(ascending=False)

print(missing_cols)

In [None]:
##Additional Imputation
g= all_data_imp
g["GarageYrBlt"]=np.where(g["GarageType"] == "None",0, g["GarageYrBlt"].fillna(g["YearBuilt"]))


In [None]:
##Missingness indicators for high NA
hi_na_cols = ["Alley","BsmtQual","BsmtCond","BsmtExposure","BsmtFinType2","FireplaceQu","GarageType",
              "GarageFinish","GarageQual","GarageCond","PoolQC","Fence","MiscFeature","MasVnrArea","LotFrontage"]

for c in hi_na_cols:
    all_data_imp[c + "_was_missing"]= all_data[c].isna().astype(int)

In [None]:
##Define Mapping for ordinal feature
##Quality Mappings
qual_mappings ={"Ex": 5, "Gd": 4, "TA": 3, "Fa": 2, "Po":1, "None":0}

Ordinal_cols ={
    "ExterQual": qual_mappings,
    "ExterCond": qual_mappings,
    "BsmtQual": qual_mappings,
    "BsmtCond": qual_mappings,
    "HeatingQC": qual_mappings,
    "KitchenQual": qual_mappings,
    "FireplaceQu": qual_mappings,
    "GarageQual": qual_mappings,
    "GarageCond": qual_mappings,
    "PoolQC": qual_mappings
}

all_data_enc = all_data_imp.copy()
for col, mapping in Ordinal_cols.items():
    all_data_enc[col]=all_data_enc[col].map(mapping)

In [None]:
##One hot encoding other categorical cols
all_data_enc = pd.get_dummies(all_data_enc, drop_first=True)
print("shape after encoding:", all_data_enc.shape)

In [None]:
all_data_enc.head(5)

In [None]:
##Detecting Skewness in numeric features
from scipy.stats import skew
##Seperate numeric features (after encoding, some 'ordinal mapped' are also numeric now)
numeric_feats = [col for col in all_data_enc.columns
                                                if
                all_data_enc[col].dtypes != "uint8" and
                all_data_enc[col].nunique()>10
                ] 
all_data_enc.dtypes[(all_data_enc.dtypes != "uint8")].index  ##excludes one hot cols
##Calculate skewness
skewed_feats = all_data_enc[numeric_feats].apply(lambda x:skew(x.dropna())).sort_values(ascending = False)
print("Top 10 skewed features:\n", skewed_feats.head(10))

##Pick features with skew >.75
skewed_cols = skewed_feats[abs(skewed_feats)> .75].index

##Apply log1p
all_data_enc[skewed_cols] = np.log1p(all_data_enc[skewed_cols])

print(f"Transformed {len(skewed_cols)} skewed numeric features.")


In [None]:
drop_cols = ["PoolArea", "MiscVal", "3SsnPorch","LowQualFinSF"]
all_data_final = all_data_enc.drop(columns = drop_cols)
print("Final dataset shape:", all_data_final.shape)

In [None]:
all_data_feat = all_data_final.copy()

# 1. Total areas
all_data_feat["TotalSF"] = all_data_feat["TotalBsmtSF"] + all_data_feat["1stFlrSF"] + all_data_feat["2ndFlrSF"]
all_data_feat["TotalPorchSF"] = (all_data_feat["OpenPorchSF"] + 
                                 all_data_feat["EnclosedPorch"] + all_data_feat["ScreenPorch"] + 
                                 all_data_feat["WoodDeckSF"])

# 2. Bathrooms & rooms
all_data_feat["TotalBath"] = (all_data_feat["FullBath"] + 0.5*all_data_feat["HalfBath"] +
                              all_data_feat["BsmtFullBath"] + 0.5*all_data_feat["BsmtHalfBath"])
all_data_feat["TotalRooms"] = all_data_feat["TotRmsAbvGrd"] + all_data_feat["FullBath"] + all_data_feat["HalfBath"]

# 3. Age features
all_data_feat["HouseAge"] = all_data_feat["YrSold"] - all_data_feat["YearBuilt"]
all_data_feat["RemodAge"] = all_data_feat["YrSold"] - all_data_feat["YearRemodAdd"]
all_data_feat["GarageAge"] = np.where(all_data_feat["GarageYrBlt"] > 0,
                                      all_data_feat["YrSold"] - all_data_feat["GarageYrBlt"], 0)

# 4. Quality × size
all_data_feat["OverallQual_GrLivArea"] = all_data_feat["OverallQual"] * all_data_feat["GrLivArea"]
all_data_feat["OverallQual_TotSF"] = all_data_feat["OverallQual"] * all_data_feat["TotalSF"]

# 5. Ratios
all_data_feat["RoomsPerArea"] = all_data_feat["TotRmsAbvGrd"] / (all_data_feat["GrLivArea"] + 1e-5)
all_data_feat["LotAreaPerRoom"] = all_data_feat["LotArea"] / (all_data_feat["TotRmsAbvGrd"] + 1)

print("✅ Final dataset with engineered features:", all_data_feat.shape)

In [None]:
drop_more = [
    "Utilities", "Street", "Condition2", "RoofMatl", "Heating", 
    "GarageArea",   # keep GarageCars
    "TotRmsAbvGrd"  # we keep TotalRooms instead
]

all_data_clean = all_data_feat.drop(columns=[c for c in drop_more if c in all_data_feat.columns])

print("✅ Final dataset after feature selection:", all_data_clean.shape)

In [None]:
from sklearn.preprocessing import StandardScaler

# Save number of training rows
n_train = X.shape[0]

# Split back into train/test from combined processed data
train_processed = all_data_clean.iloc[:n_train, :].copy()
test_processed = all_data_clean.iloc[n_train:, :].copy()

# Scale features for linear models
scaler = StandardScaler()
train_scaled = scaler.fit_transform(train_processed)
test_scaled = scaler.transform(test_processed)

print("Train scaled shape:", train_scaled.shape)
print("Test scaled shape:", test_scaled.shape)

In [None]:
# Pick the feature
feature = "GrLivArea"

# Original values
train_raw = train_processed[feature].values[:5]   # first 5 rows
test_raw  = test_processed[feature].values[:5]

# Scaled values (already fit on train)
train_scaled_vals = train_scaled[:5, list(train_processed.columns).index(feature)]
test_scaled_vals  = test_scaled[:5, list(train_processed.columns).index(feature)]

print(f"Feature: {feature}\n")

print("Train raw values:   ", train_raw)
print("Train scaled values:", np.round(train_scaled_vals, 3))

print("\nTest raw values:    ", test_raw)
print("Test scaled values: ", np.round(test_scaled_vals, 3))

print("\nScaler parameters → mean =", round(scaler.mean_[list(train_processed.columns).index(feature)], 2),
      "std =", round(scaler.scale_[list(train_processed.columns).index(feature)], 2))

In [None]:
train["SalePrice"]

In [None]:
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_squared_error
import numpy as np

# Target variable (log-transform helps normality of SalePrice)
y = np.log1p(train["SalePrice"])   # train["SalePrice"] comes from original training dataset

# Candidate alpha values to search
alphas = [0.1, 1, 5, 10, 20, 50, 100, 200, 500, 1000]

# RidgeCV does cross-validation internally
ridge = RidgeCV(alphas=alphas, scoring="neg_mean_squared_error", cv=5)

ridge.fit(train_scaled, y)

# Best alpha
print("Best alpha:", ridge.alpha_)

# CV RMSE (converted from neg MSE)
cv_rmse = np.sqrt(-ridge.best_score_)
print("CV RMSE:", cv_rmse)

# Predict on test set (already scaled)
ridge_preds = np.expm1(ridge.predict(test_scaled))

In [None]:
from sklearn.model_selection import cross_val_predict

# Get out-of-fold predictions using cross-validation
train_pred_log = cross_val_predict(ridge, train_scaled, y, cv=5)

# Convert back to original SalePrice scale
train_pred = np.expm1(train_pred_log)
y_true = np.expm1(y)

# RMSE in original scale
rmse = np.sqrt(mean_squared_error(y_true, train_pred))
print("Training CV RMSE (original scale):", rmse)

In [None]:
import matplotlib.pyplot as plt

plt.scatter(y_true, train_pred, alpha=0.3)
plt.xlabel("Actual SalePrice")
plt.ylabel("Predicted SalePrice")
plt.title("Ridge Regression: Actual vs Predicted")
plt.plot([y_true.min(), y_true.max()], [y_true.min(), y_true.max()], 'r--')
plt.show()

In [None]:
from sklearn.linear_model import RidgeCV, LassoCV, ElasticNetCV
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import cross_val_score
import numpy as np
import pandas as pd

# Target (log-transformed SalePrice)
y = np.log1p(train["SalePrice"])

# ----- Ridge -----
alphas = [0.1, 1, 5, 10, 20, 50, 100, 200, 500, 1000]
ridge = RidgeCV(alphas=alphas, scoring="neg_mean_squared_error", cv=5)
ridge.fit(train_scaled, y)
ridge_rmse = np.sqrt(-ridge.best_score_)

# ----- Lasso -----
lasso = LassoCV(alphas=np.logspace(-4, -0.5, 50), cv=5, max_iter=10000, random_state=42)
lasso.fit(train_scaled, y)
lasso_rmse = np.sqrt(np.mean(-cross_val_score(lasso, train_scaled, y, cv=5, 
                                              scoring="neg_mean_squared_error")))

# ----- ElasticNet -----
elasticnet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], 
                          alphas=np.logspace(-4, -0.5, 50), 
                          cv=5, max_iter=10000, random_state=42)
elasticnet.fit(train_scaled, y)
elastic_rmse = np.sqrt(np.mean(-cross_val_score(elasticnet, train_scaled, y, cv=5, 
                                                scoring="neg_mean_squared_error")))

# ----- Compare -----
results = pd.DataFrame({
    "Model": ["Ridge", "Lasso", "ElasticNet"],
    "Best Alpha": [ridge.alpha_, lasso.alpha_, elasticnet.alpha_],
    "CV RMSE (log)": [ridge_rmse, lasso_rmse, elastic_rmse]
})

print(results)

In [None]:
from sklearn.linear_model import LassoCV, ElasticNetCV
from sklearn.model_selection import cross_val_score

# ----- Lasso -----
lasso = LassoCV(alphas=np.logspace(-4, -0.5, 50),
                cv=5, max_iter=50000, random_state=42)
lasso.fit(train_scaled, y)
lasso_rmse = np.sqrt(np.mean(-cross_val_score(lasso, train_scaled, y, 
                                              cv=5, scoring="neg_mean_squared_error")))
print("Best alpha (Lasso):", lasso.alpha_)
print("CV RMSE (Lasso):", lasso_rmse)

# ----- ElasticNet -----
elasticnet = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1],
                          alphas=np.logspace(-4, -0.5, 50),
                          cv=5, max_iter=50000, random_state=42)
elasticnet.fit(train_scaled, y)
elastic_rmse = np.sqrt(np.mean(-cross_val_score(elasticnet, train_scaled, y, 
                                                cv=5, scoring="neg_mean_squared_error")))
print("Best alpha (ElasticNet):", elasticnet.alpha_)
print("Best l1_ratio (ElasticNet):", elasticnet.l1_ratio_)
print("CV RMSE (ElasticNet):", elastic_rmse)

In [None]:
import pandas as pd
import numpy as np

# If lasso is already fit on train_scaled and y (log1p SalePrice):
lasso_preds_test = np.expm1(lasso.predict(test_scaled))

# Build submission
sub = pd.DataFrame({
    "Id": test["Id"],          
    "SalePrice": lasso_preds_test
})

sub_path = "submission_lasso.csv"
sub.to_csv(sub_path, index=False)
print("Saved:", sub_path, "→", sub.head())


In [None]:
!kaggle competitions submit -c house-prices-advanced-regression-techniques -f submission_lasso.csv -m "LassoCV baseline submission"

### XGBoost Model

In [None]:
pip install xgboost

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import numpy as np

# Features and target
X_tr = train_processed
X_te = test_processed
y_log = np.log1p(train["SalePrice"])   # log1p for Kaggle scoring


In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
import numpy as np

# Features and target
X_tr = train_processed
X_te = test_processed
y_log = np.log1p(train["SalePrice"])

kf = KFold(n_splits=5, shuffle=True, random_state=42)
cv_rmse = []

for tr_idx, val_idx in kf.split(X_tr):
    Xtr, Xval = X_tr.iloc[tr_idx], X_tr.iloc[val_idx]
    ytr, yval = y_log.iloc[tr_idx], y_log.iloc[val_idx]

    xgb = XGBRegressor(
        n_estimators=2000,       # fixed number of trees
        learning_rate=0.01,
        max_depth=4,
        subsample=0.7,
        colsample_bytree=0.8,
        reg_lambda=1.0,
        random_state=42,
        tree_method="hist"       # or "auto" if hist not supported
    )

    xgb.fit(Xtr, ytr, verbose=False)

    pred_val = xgb.predict(Xval)
    rmse = np.sqrt(mean_squared_error(yval, pred_val))
    cv_rmse.append(rmse)

print("XGB 5-fold CV RMSE (log):", np.mean(cv_rmse), "+/-", np.std(cv_rmse))



In [None]:
xgb_final = XGBRegressor(
    n_estimators=2000,
    learning_rate=0.01,
    max_depth=4,
    subsample=0.7,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=42,
    tree_method="hist"
)

xgb_final.fit(X_tr, y_log)
xgb_preds = np.expm1(xgb_final.predict(X_te))


##Blent Both Lasso and xgboost

In [None]:

blend_preds = 0.5 * lasso_preds_test + 0.5 * xgb_preds

sub_blend = pd.DataFrame({
    "Id": test["Id"],
    "SalePrice": blend_preds
})

sub_blend.to_csv("submission_blend.csv", index=False)
print("✅ Saved submission_blend.csv")


In [None]:
!kaggle competitions submit -c house-prices-advanced-regression-techniques -f submission_blend.csv -m "LassoCV xgboostCV blend submission"

##Finding best blend weight of xgboost and lasso

In [None]:
from sklearn.preprocessing import StandardScaler

kf = KFold(n_splits=5, shuffle=True, random_state=42)

lasso_oof = np.zeros(len(X_tr))
xgb_oof = np.zeros(len(X_tr))

for tr_idx, val_idx in kf.split(X_tr):
    Xtr, Xval = X_tr.iloc[tr_idx], X_tr.iloc[val_idx]
    ytr, yval = y_log.iloc[tr_idx], y_log.iloc[val_idx]

    # ---- Scale for Lasso ----
    scaler = StandardScaler()
    Xtr_scaled = scaler.fit_transform(Xtr)
    Xval_scaled = scaler.transform(Xval)

    # ---- Lasso ----
    lasso.fit(Xtr_scaled, ytr)
    lasso_oof[val_idx] = lasso.predict(Xval_scaled)

    # ---- XGB ----
    xgb = XGBRegressor(
        n_estimators=2000,
        learning_rate=0.01,
        max_depth=4,
        subsample=0.7,
        colsample_bytree=0.8,
        reg_lambda=1.0,
        random_state=42,
        tree_method="hist"
    )
    xgb.fit(Xtr, ytr, verbose=False)
    xgb_oof[val_idx] = xgb.predict(Xval)



In [None]:
best_rmse = float("inf")
best_w = 0

for w in np.linspace(0, 1, 21):   # 0.0, 0.05, 0.10, ..., 1.0
    blend = w * lasso_oof + (1 - w) * xgb_oof
    rmse = np.sqrt(mean_squared_error(y_log, blend))
    if rmse < best_rmse:
        best_rmse = rmse
        best_w = w

print("Best weight for Lasso:", round(best_w, 2))
print("CV RMSE with blend:", best_rmse)


In [None]:
candidates = [0.30,0.32,0.35, 0.4, 0.45, 0.5, 0.55, 0.6, 0.65]
for w in candidates:
    preds = w*lasso_preds_test + (1-w)*xgb_preds
    pd.DataFrame({"Id": test["Id"], "SalePrice": preds})\
      .to_csv(f"sub_blend_{w:.2f}.csv", index=False)


In [None]:
import os

print(os.listdir())

In [None]:
!kaggle competitions submit -c house-prices-advanced-regression-techniques -f sub_blend_0.35.csv -m "LassoCV xgboostCV blend(.35) submission"

##Stacking with a meta-model

In [None]:
from sklearn.linear_model import Ridge

# Stack training (use OOF preds of lasso & xgb)
stack_train = np.c_[lasso_oof, xgb_oof]
stack_test  = np.c_[np.log1p(lasso_preds_test), np.log1p(xgb_preds)]

meta = Ridge(alpha=1.0, random_state=42)
meta.fit(stack_train, y_log)

stack_preds_log = meta.predict(stack_test)
stack_preds = np.expm1(stack_preds_log)

pd.DataFrame({"Id": test["Id"], "SalePrice": stack_preds}).to_csv("submission_stack.csv", index=False)


In [None]:
!kaggle competitions submit -c house-prices-advanced-regression-techniques -f submission_stack.csv -m "LassoCV xgboostCV Stack submission"

In [None]:
pip install lightgbm

3-model stacking pipeline with Lasso + XGB + LightGBM stacked using a Ridge meta-model

In [None]:
from sklearn.linear_model import LassoCV, RidgeCV
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import numpy as np
import pandas as pd

# Base models
lasso = LassoCV(alphas=np.logspace(-4, -0.5, 50), max_iter=50000, random_state=42)
xgb = XGBRegressor(
    n_estimators=2000,
    learning_rate=0.01,
    max_depth=4,
    subsample=0.7,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=42,
    tree_method="hist"
)
lgbm = LGBMRegressor(
    n_estimators=2000,
    learning_rate=0.01,
    max_depth=4,
    subsample=0.7,
    colsample_bytree=0.8,
    reg_lambda=1.0,
    random_state=42
)


In [None]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

# OOF + Test predictions
oof_lasso, oof_xgb, oof_lgbm = np.zeros(len(train_processed)), np.zeros(len(train_processed)), np.zeros(len(train_processed))
test_lasso, test_xgb, test_lgbm = np.zeros(len(test_processed)), np.zeros(len(test_processed)), np.zeros(len(test_processed))

cv_rmse_lasso, cv_rmse_xgb, cv_rmse_lgbm = [], [], []

y_log = np.log1p(train["SalePrice"])
X, T = train_processed, test_processed

for fold, (tr_idx, val_idx) in enumerate(kf.split(X), 1):
    X_tr, X_val = X.iloc[tr_idx], X.iloc[val_idx]
    y_tr, y_val = y_log.iloc[tr_idx], y_log.iloc[val_idx]

    # ---- Lasso ----
    lasso.fit(X_tr, y_tr)
    pred_val = lasso.predict(X_val)
    oof_lasso[val_idx] = pred_val
    test_lasso += lasso.predict(T) / kf.n_splits
    cv_rmse_lasso.append(np.sqrt(mean_squared_error(y_val, pred_val)))

    # ---- XGB ----
    xgb.fit(X_tr, y_tr, verbose=False)
    pred_val = xgb.predict(X_val)
    oof_xgb[val_idx] = pred_val
    test_xgb += xgb.predict(T) / kf.n_splits
    cv_rmse_xgb.append(np.sqrt(mean_squared_error(y_val, pred_val)))

    # ---- LGBM ----
    lgbm.fit(X_tr, y_tr)
    pred_val = lgbm.predict(X_val)
    oof_lgbm[val_idx] = pred_val
    test_lgbm += lgbm.predict(T) / kf.n_splits
    cv_rmse_lgbm.append(np.sqrt(mean_squared_error(y_val, pred_val)))

print("CV RMSE Lasso:", np.mean(cv_rmse_lasso), "+/-", np.std(cv_rmse_lasso))
print("CV RMSE XGB:", np.mean(cv_rmse_xgb), "+/-", np.std(cv_rmse_xgb))
print("CV RMSE LGBM:", np.mean(cv_rmse_lgbm), "+/-", np.std(cv_rmse_lgbm))


In [None]:
# Stack OOF predictions
X_meta = np.vstack([oof_lasso, oof_xgb, oof_lgbm]).T
T_meta = np.vstack([test_lasso, test_xgb, test_lgbm]).T

ridge_meta = RidgeCV(alphas=[0.1, 1.0, 10.0], cv=5)
ridge_meta.fit(X_meta, y_log)

# OOF meta-preds for CV RMSE
oof_meta = ridge_meta.predict(X_meta)
cv_rmse_meta = np.sqrt(mean_squared_error(y_log, oof_meta))

print("✅ CV RMSE Stacked Model:", cv_rmse_meta)

# Final test predictions
final_preds = np.expm1(ridge_meta.predict(T_meta))


In [None]:
submission = pd.DataFrame({
    "Id": test["Id"],
    "SalePrice": final_preds
})
submission.to_csv("submission_3Model_stack.csv", index=False)
print("Submission file created: submission_3Model_stack.csv")


In [None]:
!kaggle competitions submit -c house-prices-advanced-regression-techniques -f submission_3Model_stack.csv -m "LassoCV xgboostCV lightgbm_Stack submission"