In [14]:
#8a

# Step 1: Import Libraries
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Step 2: Load and Prepare Data
df = pd.DataFrame({
    'X1':[1,2,3,4,5,6,6,7,9,9],
    'X2':[5,3,6,8,1,9,5,8,9,2],
    'label':[1,1,0,1,0,1,0,1,0,0]
})

X = df[['X1','X2']].values
y = df['label'].values

# convert y into {-1, +1} form for AdaBoost math
y_transformed = np.where(y==1, 1, -1)

X_train, X_test, y_train, y_test = train_test_split(
    X, y_transformed, test_size=0.3, random_state=42
)

# Step 3: Initialize Parameters
n_estimators = 5
n_samples = X_train.shape[0]
weights = np.ones(n_samples) / n_samples
classifiers = []
alphas = []

# Step 4: Train Weak Classifiers
for _ in range(n_estimators):
    clf = DecisionTreeClassifier(max_depth=1)
    clf.fit(X_train, y_train, sample_weight=weights)
   
    y_pred = clf.predict(X_train)
   
    # Calculate weighted error
    err = np.sum(weights * (y_pred != y_train)) / np.sum(weights)
    if err == 0:
        err = 1e-10
   
    # Compute alpha
    alpha = 0.5 * np.log((1 - err) / err)
   
    # Update weights
    weights = weights * np.exp(-alpha * y_train * y_pred)
    weights /= np.sum(weights)  # normalize
   
    classifiers.append(clf)
    alphas.append(alpha)

# Step 5: Make Predictions
def predict(X):
    final_score = np.zeros(X.shape[0])
    for clf, alpha in zip(classifiers, alphas):
        preds = clf.predict(X)
        final_score += alpha * preds
    return np.sign(final_score)

y_pred_test = predict(X_test)

# Step 6: Evaluate Model
acc = accuracy_score(y_test, y_pred_test)

# Step 7: Output Results
print("Final Accuracy on Test Set:", acc)
print("Classifier Weights (alphas):", alphas)

Final Accuracy on Test Set: 0.3333333333333333
Classifier Weights (alphas): [np.float64(0.8958797346140275), np.float64(0.8047189562170503), np.float64(1.0986122886681098), np.float64(0.8047189562170504), np.float64(0.8047189562170504)]


In [15]:
#8b

# -----------------------------------------------------------
# Step 1: Import Necessary Libraries
# -----------------------------------------------------------
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error

# -----------------------------------------------------------
# Step 2: Prepare the Data (Generate Synthetic Dataset)
# -----------------------------------------------------------

# Create synthetic data
np.random.seed(42)
X = np.random.rand(500, 3)  # 500 samples, 3 features
y = 4*X[:,0] - 2*X[:,1] + 3*X[:,2] + np.random.randn(500)*0.1  # target with noise

# Convert to DataFrame (optional)
df = pd.DataFrame(X, columns=['f1', 'f2', 'f3'])
df['target'] = y

# Split into features and target
X = df[['f1', 'f2', 'f3']]
y = df['target']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------------------------------------
# Step 3: Initialize Parameters
# -----------------------------------------------------------
learning_rate = 0.1
n_estimators = 100

trees = []            # list to store decision trees
learning_rates = []   # list to store learning rates

# -----------------------------------------------------------
# Step 4: Initialize the Base Model
# -----------------------------------------------------------
F0 = np.mean(y_train)                          # initial prediction (mean)
F = np.full(y_train.shape, F0)                 # model predictions on train set

# -----------------------------------------------------------
# Step 5: Gradient Boosting Rounds
# -----------------------------------------------------------
for i in range(n_estimators):

    # Compute pseudo-residuals (negative gradient)
    residuals = y_train - F

    # Fit a decision tree to the residuals
    tree = DecisionTreeRegressor(max_depth=3, random_state=42)
    tree.fit(X_train, residuals)

    # Predict residuals on training data
    tree_predictions = tree.predict(X_train)

    # Update model predictions
    F += learning_rate * tree_predictions

    # Save the weak learner and learning rate
    trees.append(tree)
    learning_rates.append(learning_rate)

# -----------------------------------------------------------
# Step 6: Make Predictions on Test Data
# -----------------------------------------------------------
F_test = np.full(y_test.shape, F0)   # start with base model prediction

for tree, lr in zip(trees, learning_rates):
    F_test += lr * tree.predict(X_test)

# -----------------------------------------------------------
# Step 7: Evaluate the Model
# -----------------------------------------------------------
train_mse = mean_squared_error(y_train, F)
test_mse = mean_squared_error(y_test, F_test)

print("Training MSE:", train_mse)
print("Testing MSE:", test_mse)



Training MSE: 0.009237828527151935
Testing MSE: 0.06198274015735236
