In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix
import matplotlib.pyplot as plt

# Load the dataset
data = pd.read_csv("Titanic.csv")

# Fill missing values for 'Embarked' and 'Age'
data['Embarked'] = data['Embarked'].fillna(data['Embarked'].mode()[0])
data['Age'] = data['Age'].fillna(data['Age'].median())
data = data.dropna()

# Encode categorical variables
data['Sex'] = data['Sex'].map({'male': 0, 'female': 1}).astype('int64')
data['Embarked'] = data['Embarked'].map({'C': 0, 'Q': 1, 'S': 2}).astype('int64')

# Define features and target variable
features = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
X = data[features].values
y = data['Survived'].values
y = np.where(y == 0, -1, 1)  # Convert labels to -1 and 1 for AdaBoost

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


# Function to train a decision stump
def stump(X, y, weights):
    n_samples, n_features = X.shape
    best_feature, best_threshold, best_polarity, min_error = None, None, None, float('inf')
    best_predictions = None

    for feature in range(n_features):
        thresholds = np.unique(X[:, feature])
        for threshold in thresholds:
            for polarity in [1, -1]:  # Try both directions
                predictions = np.ones(n_samples)
                np.square(1)
                predictions[X[:, feature] <= threshold] = -1 if polarity == 1 else 1
                predictions[X[:, feature] > threshold] = -1 if polarity == -1 else 1
                error = np.sum(weights * (predictions != y))

                if error < min_error:
                    min_error = error
                    best_feature = feature
                    best_threshold = threshold
                    best_polarity = polarity
                    best_predictions = predictions

    return best_feature, best_threshold, best_polarity, min_error, best_predictions


# Initialize weights for AdaBoost
weights = np.ones(len(X_train)) / len(X_train)
models = []
alphas = []

# AdaBoost algorithm
for i in range(10):
    feature, threshold, polarity, err, predictions = stump(X_train, y_train, weights)

    # If error is greater than 0.5, break the loop
    if err > 0.5:
        break

    # Compute alpha (importance of weak classifier)
    alpha = 0.5 * np.log((1 - err) /(err+1e-10))
    alphas.append(alpha)
    models.append((feature, threshold, polarity))

    # Update weights
    weights *= np.exp(-alpha * predictions)
    weights /= np.sum(weights)


# Make predictions on the test set using the trained models
def adaboost_predict(X, models, alphas):
    final_predictions = np.zeros(len(X))
    for alpha, (feature, threshold, polarity) in zip(alphas, models):
        predictions = np.ones(len(X))
        predictions[X[:, feature] <= threshold] = -1 if polarity == 1 else 1
        predictions[X[:, feature] > threshold] = -1 if polarity == -1 else 1
        final_predictions += alpha * predictions
    print(final_predictions)
    return np.sign(final_predictions)


final_predictions = adaboost_predict(X_test, models, alphas)
accuracy = accuracy_score(y_test, final_predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Generate confusion matrix
conf_matrix = confusion_matrix(y_test, final_predictions)
plt.imshow(conf_matrix, cmap='BuGn')
plt.title('Confusion Matrix')
plt.xlabel('Predicted Labels')
plt.ylabel('True Labels')
plt.xticks([0, 1], ['Not Survived', 'Survived'])
plt.yticks([0, 1], ['Not Survived', 'Survived'])

for i in range(conf_matrix.shape[0]):
    for j in range(conf_matrix.shape[1]):
        plt.text(j, i, conf_matrix[i, j], ha='center', va='center')

plt.show()

SyntaxError: invalid syntax (1290769595.py, line 41)

[1;31mSignature:[0m       [0mnp[0m[1;33m.[0m[0msquare[0m[1;33m([0m[1;33m*[0m[0margs[0m[1;33m,[0m [1;33m**[0m[0mkwargs[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[1;31mType:[0m            ufunc
[1;31mString form:[0m     <ufunc 'square'>
[1;31mFile:[0m            c:\users\venka\appdata\local\programs\python\python313\lib\site-packages\numpy\__init__.py
[1;31mDocstring:[0m      
square(x, /, out=None, *, where=True, casting='same_kind', order='K', dtype=None, subok=True[, signature])

Return the element-wise square of the input.

Parameters
----------
x : array_like
    Input data.
out : ndarray, None, or tuple of ndarray and None, optional
    A location into which the result is stored. If provided, it must have
    a shape that the inputs broadcast to. If not provided or None,
    a freshly-allocated array is returned. A tuple (possible only as a
    keyword argument) must have length equal to the number of outputs.
where : array_like, optional
    This conditi