# 04_ensemble_adaboost_bagging_casinj


In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

data = pd.read_csv('datawithTime.csv')

feature_columns = ['CARS', 'TEMP', 'TRNSPD', 'TONS', 'POSITON1', 'HEADEND1', 'LOADF1', 'EMPTYF1', 'HIGHSPD', 'hour', 'minute', 'RAILROAD', 'YEAR', 'MONTH', 'DAY', 'STATE ', 'VISIBLTY', 'WEATHER', 'TYPEQ', 'TRKCLAS', 'TYPTRK', 'CAUSE', 'ACCTRK']
target_column = 'CASINJ'

X = data[feature_columns]
y = data[target_column]

def initialize_weights(num_samples):
    return np.ones(num_samples) / num_samples

def train_weak_classifier(X, y, weights):
    classifier = DecisionTreeClassifier(max_depth=1)
    classifier.fit(X, y, sample_weight=weights)
    return classifier

def calculate_error_rate(classifier, X, y, weights):
    predictions = classifier.predict(X)
    incorrect = predictions != y
    error_rate = np.sum(weights * incorrect)
    return error_rate

def calculate_classifier_weight(error_rate):
    gamma = 0.5 * np.log((1 - error_rate) / error_rate)
    return gamma

def update_weights(weights, classifier, gamma, X, y):
    predictions = classifier.predict(X)
    incorrect = predictions != y
    update_factor = np.exp(-gamma * incorrect)
    new_weights = weights * update_factor / np.sum(weights * np.exp(-gamma * y * classifier.predict(X)))
    return new_weights

def adaboost(X, y, num_rounds):
    num_samples = len(X)
    weights = initialize_weights(num_samples)
    classifiers = []
    alphas = []

    for round_num in range(num_rounds):
        print(f"\nBoosting Round {round_num + 1}")

        classifier = train_weak_classifier(X, y, weights)

        error_rate = calculate_error_rate(classifier, X, y, weights)
        print(f"Error rate: {error_rate:.4f}")

        gamma = calculate_classifier_weight(error_rate)
        print(f"Classifier weight (γk): {gamma:.4f}")

        weights = update_weights(weights, classifier, gamma, X, y)

        classifiers.append(classifier)
        alphas.append(gamma)

    def strong_classifier(X):
        total = np.zeros(len(X))
        for alpha, classifier in zip(alphas, classifiers):
            total += alpha * classifier.predict(X)
        return np.sign(total)

    return strong_classifier

K = 5  # You can adjust K as needed
alpha = 0.2

strong_classifiers = []

for _ in range(K):
    X_subset, _, y_subset, _ = train_test_split(X, y, train_size=alpha, random_state=None)

    weights = initialize_weights(len(X_subset))

    num_rounds = 10  # You can adjust the number of boosting rounds
    strong_classifier = adaboost(X_subset, y_subset, num_rounds)

    strong_classifiers.append(strong_classifier)



In [None]:

import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

data = pd.read_csv('datawithTime.csv')

feature_columns = ['CARS', 'TEMP', 'TRNSPD', 'TONS', 'POSITON1', 'HEADEND1', 'LOADF1', 'EMPTYF1', 'HIGHSPD', 'hour', 'minute', 'RAILROAD', 'YEAR', 'MONTH', 'DAY', 'STATE ', 'VISIBLTY', 'WEATHER', 'TYPEQ', 'TRKCLAS', 'TYPTRK', 'CAUSE', 'ACCTRK']
target_column = 'CASINJ'

X = data[feature_columns]
y = data[target_column]

def initialize_weights(num_samples):
    print(num_samples)
    return np.ones(num_samples) / num_samples

def train_weak_classifier(X, y, weights):
    classifier = DecisionTreeClassifier(max_depth=1)
    classifier.fit(X, y, sample_weight=weights)
    return classifier

def calculate_error_rate(classifier, X, y, weights):
    predictions = classifier.predict(X)
    incorrect = predictions != y
    error_rate = np.sum(weights * incorrect)
    print(error_rate)
    return error_rate

def calculate_classifier_weight(error_rate):
    gamma = 0.5 * np.log((1 - error_rate) / error_rate)
    print(gamma)
    return gamma

def update_weights(weights, classifier, gamma, X, y):
    predictions = classifier.predict(X)
    incorrect = predictions != y
    update_factor = np.exp(-gamma * incorrect)
    new_weights = weights * update_factor / np.sum(weights * np.exp(-gamma * incorrect))
    print(new_weights)
    return new_weights

def adaboost_one_vs_all(X, y, num_classes, num_rounds):
    num_samples = len(X)
    classifiers = []
    alphas = []

    for class_label in range(num_classes):
        print(f"\nTraining AdaBoost for Class {class_label}")
        y_binary = (y == class_label).astype(int)

        weights = initialize_weights(num_samples)
        class_classifiers = []
        class_alphas = []

        for round_num in range(num_rounds):
            print(f"\nBoosting Round {round_num + 1}")

            classifier = train_weak_classifier(X, y_binary, weights)

            error_rate = calculate_error_rate(classifier, X, y_binary, weights)
            print(f"Error rate: {error_rate:.4f}")

            gamma = calculate_classifier_weight(error_rate)
            print(f"Classifier weight (γk): {gamma:.4f}")

            weights = update_weights(weights, classifier, gamma, X, y_binary)

            class_classifiers.append(classifier)
            class_alphas.append(gamma)

        classifiers.append(class_classifiers)
        alphas.append(class_alphas)

    def strong_classifiers(X):
        class_votes = []
        for class_label, class_classifiers in enumerate(classifiers):
            total = np.zeros(len(X))
            for alpha, classifier in zip(alphas[class_label], class_classifiers):
                total += alpha * classifier.predict(X)
            class_votes.append(total)
        return np.argmax(class_votes, axis=0)

    return strong_classifiers

K = 10  # You can adjust K as needed
alpha = 0.2

num_classes = len(np.unique(y))

strong_classifiers_one_vs_all = []

for _ in range(K):
    X_subset, _, y_subset, _ = train_test_split(X, y, train_size=alpha, random_state=None)



In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

data = pd.read_csv('datawithTime.csv')

feature_columns = ['CARS', 'TEMP', 'TRNSPD', 'TONS', 'POSITON1', 'HEADEND1', 'LOADF1', 'EMPTYF1', 'HIGHSPD', 'hour', 'minute', 'RAILROAD', 'YEAR', 'MONTH', 'DAY', 'STATE ', 'VISIBLTY', 'WEATHER', 'TYPEQ', 'TRKCLAS', 'TYPTRK', 'CAUSE', 'ACCTRK']
target_column = 'CASINJ'

X = data[feature_columns]
y = data[target_column]

def train_weak_classifier(X, y):
    classifier = DecisionTreeClassifier(max_depth=1)
    classifier.fit(X, y)
    return classifier

def calculate_error_rate(classifier, X, y):
    predictions = classifier.predict(X)
    incorrect = predictions != y
    error_rate = np.mean(incorrect)  # Use mean error rate for SAMME
    return error_rate

def calculate_classifier_weight(error_rate, num_classes):
    return 0.5 * np.log((1.0 - error_rate) / (error_rate * (num_classes - 1)))

def samme(X, y, num_rounds, num_classes):
    num_samples = len(X)
    classifiers = []
    alphas = []

    for round_num in range(num_rounds):
        print(f"\nBoosting Round {round_num + 1}")

        classifier = train_weak_classifier(X, y)

        error_rate = calculate_error_rate(classifier, X, y)
        print(f"Error rate: {error_rate:.4f}")

        gamma = calculate_classifier_weight(error_rate, num_classes)
        print(f"Classifier weight (γk): {gamma:.4f}")

        classifiers.append(classifier)
        alphas.append(gamma)

    def strong_classifier(X):
        num_samples = len(X)
        num_classifiers = len(classifiers)
        class_scores = np.zeros((num_samples, num_classes))

        for i in range(num_classifiers):
            classifier = classifiers[i]
            alpha = alphas[i]
            predictions = classifier.predict(X)
            class_scores += alpha * (predictions.reshape(-1, 1) == np.arange(num_classes))

        predicted_classes = np.argmax(class_scores, axis=1)
        return predicted_classes

    return strong_classifier

K = 5  # You can adjust K as needed
alpha = 0.2
num_classes = len(np.unique(y))  # Determine the number of classes in your dataset

strong_classifiers = []

for _ in range(K):
    X_subset, _, y_subset, _ = train_test_split(X, y, train_size=alpha, random_state=None)

    num_rounds = 10  # You can adjust the number of boosting rounds
    strong_classifier = samme(X_subset, y_subset, num_rounds, num_classes)

    strong_classifiers.append(strong_classifier)

