In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, BaggingClassifier, AdaBoostClassifier, StackingClassifier, VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

# Load the Heart Disease dataset
url = "heart.csv"
df = pd.read_csv(url)

# Preprocessing: Separate features and target
X = df.drop('target', axis=1)
y = df['target']

# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Helper function to evaluate models and print accuracy
def evaluate_model(model, X_test, y_test, model_name="Model"):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f"{model_name} Accuracy: {accuracy:.4f}")

# -------------------------------
# 1. Bagging: Multiple models in Bagging
# -------------------------------
base_models = [DecisionTreeClassifier(), SVC(probability=True)]
for base_model in base_models:
    bagging_clf = BaggingClassifier(estimator=base_model, n_estimators=100, random_state=42)
    bagging_clf.fit(X_train, y_train)
    evaluate_model(bagging_clf, X_test, y_test, model_name=f"Bagging with {base_model.__class__.__name__}")

# -------------------------------
# 2. Boosting: Gradient Boosting and AdaBoost with different base models
# -------------------------------
# Gradient Boosting
boosting_clf = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, random_state=42)
boosting_clf.fit(X_train, y_train)
evaluate_model(boosting_clf, X_test, y_test, model_name="Boosting (Gradient Boosting)")

# AdaBoost
adaboost_clf = AdaBoostClassifier(estimator=DecisionTreeClassifier(), n_estimators=100, random_state=42)
adaboost_clf.fit(X_train, y_train)
evaluate_model(adaboost_clf, X_test, y_test, model_name="Boosting (AdaBoost with Decision Tree)")

# -------------------------------
# 3. Voting: Hard and Soft Voting with multiple models
# -------------------------------
voting_estimators = [
    ('lr', LogisticRegression()),
    ('rf', RandomForestClassifier()),
    ('svc', SVC(probability=True))
]

# Hard Voting
voting_clf_hard = VotingClassifier(estimators=voting_estimators, voting='hard')
voting_clf_hard.fit(X_train, y_train)
evaluate_model(voting_clf_hard, X_test, y_test, model_name="Voting (Hard)")

# Soft Voting
voting_clf_soft = VotingClassifier(estimators=voting_estimators, voting='soft')
voting_clf_soft.fit(X_train, y_train)
evaluate_model(voting_clf_soft, X_test, y_test, model_name="Voting (Soft)")

# -------------------------------
# 4. Stacking: Using multiple models in stacking
# -------------------------------
stacking_estimators = [
    ('rf', RandomForestClassifier(n_estimators=100, random_state=42)),
    ('svc', SVC(probability=True)),
    ('dt', DecisionTreeClassifier())
]

# Meta learner: Logistic Regression
stacking_clf = StackingClassifier(estimators=stacking_estimators, final_estimator=LogisticRegression())
stacking_clf.fit(X_train, y_train)
evaluate_model(stacking_clf, X_test, y_test, model_name="Stacking (Meta-Learner Logistic Regression)")


Bagging with DecisionTreeClassifier Accuracy: 0.8361
Bagging with SVC Accuracy: 0.8689
Boosting (Gradient Boosting) Accuracy: 0.7869
Boosting (AdaBoost with Decision Tree) Accuracy: 0.8197




Voting (Hard) Accuracy: 0.8525
Voting (Soft) Accuracy: 0.8852
Stacking (Meta-Learner Logistic Regression) Accuracy: 0.8525


### Dataset Features for house 

The dataset contains features like median income, house age, rooms, population, and house prices for housing blocks.

| **Feature**   | **Description**                  |
|---------------|----------------------------------|
| `MedInc`      | Median income in the block       |
| `HouseAge`    | Median house age in the block    |
| `AveRooms`    | Average number of rooms per house|
| `AveBedrms`   | Average number of bedrooms       |
| `Population`  | Block population                 |
| `AveOccup`    | Average house occupancy          |
| `Latitude`    | Latitude of the block            |
| `Longitude`   | Longitude of the block           |
| `Price`       | Median house value (Target)      |

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, AdaBoostRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the Heart Disease dataset
url = "house_data.csv"
df = pd.read_csv(url)

# Preprocessing: Separate features and target
X = df.drop('Price', axis=1)
y = df['Price']


# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the dataset
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Helper function to evaluate models and print R^2 and MSE
def evaluate_model(model, X_test, y_test, model_name="Model"):
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    print(f"{model_name} R^2 Score: {r2:.4f}, MSE: {mse:.4f}")

# -------------------------------
# 1. Bagging: Multiple models in Bagging
# -------------------------------
base_models = [DecisionTreeRegressor(), SVR()]
for base_model in base_models:
    bagging_clf = BaggingRegressor(estimator=base_model, n_estimators=100, random_state=42)
    bagging_clf.fit(X_train, y_train)
    evaluate_model(bagging_clf, X_test, y_test, model_name=f"Bagging with {base_model.__class__.__name__}")

# -------------------------------
# 2. Boosting: Gradient Boosting and AdaBoost with different base models
# -------------------------------
# Gradient Boosting
boosting_clf = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
boosting_clf.fit(X_train, y_train)
evaluate_model(boosting_clf, X_test, y_test, model_name="Boosting (Gradient Boosting)")

# AdaBoost
adaboost_clf = AdaBoostRegressor(estimator=DecisionTreeRegressor(), n_estimators=100, random_state=42)
adaboost_clf.fit(X_train, y_train)
evaluate_model(adaboost_clf, X_test, y_test, model_name="Boosting (AdaBoost with Decision Tree)")

# -------------------------------
# 3. Voting: Hard and Soft Voting with multiple models
# -------------------------------
voting_estimators = [
    ('lr', LinearRegression()),
    ('rf', RandomForestRegressor()),
    ('svr', SVR())
]

# Voting Regressor (no hard/soft distinction here, just averaged predictions)
voting_clf = VotingRegressor(estimators=voting_estimators)
voting_clf.fit(X_train, y_train)
evaluate_model(voting_clf, X_test, y_test, model_name="Voting Regressor")

# -------------------------------
# 4. Stacking: Using multiple models in stacking
# -------------------------------
stacking_estimators = [
    ('rf', RandomForestRegressor(n_estimators=100, random_state=42)),
    ('svr', SVR()),
    ('dt', DecisionTreeRegressor())
]

# Meta learner: Linear Regression
stacking_clf = StackingRegressor(estimators=stacking_estimators, final_estimator=LinearRegression())
stacking_clf.fit(X_train, y_train)
evaluate_model(stacking_clf, X_test, y_test, model_name="Stacking (Meta-Learner Linear Regression)")


Bagging with DecisionTreeRegressor R^2 Score: 0.8048, MSE: 0.2557


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, AdaBoostRegressor, StackingRegressor, VotingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score


# Load the Heart Disease dataset
url = "house_data.csv"
df = pd.read_csv(url)

In [2]:
df.columns

Index(['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'Price'],
      dtype='object')