# Loading libraries

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn.ensemble import RandomForestClassifier

# Loading data

In [4]:
dataset = pd.read_csv("READYCPTfourthrun.csv")

# Separating matrix of features from the dependent variable

In [5]:
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Splitting data into training and test sets

In [6]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

# Building Logistic Regression instance

In [7]:
classifier = LogisticRegression()

# Perform Grid Search to find the best hyperparameters (tuning)


In [8]:
param_grid = {
    "C": [0.001, 0.01, 0.1, 1, 10, 100],
    "solver": ["liblinear", "newton-cg", "saga", "sag", "lbfgs"],
    "max_iter": [100, 500, 1000, 2000, 10000]
}

# Get the best parameters and estimator from the Grid Search




In [None]:
grid_search = GridSearchCV(classifier, param_grid, cv=5, scoring="accuracy")

grid_search.fit(x_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_
print("Best Parameters:", best_params)
print("Best Score:", best_score)

best_model = grid_search.best_estimator_

# Implementation of custom function to select accuracy-weighed features

In [None]:
selected_features = list(range(x_train.shape[1]))
initial_accuracy = accuracy_score(y_test, best_model.predict(x_test))

for i in selected_features:
    features_to_use = [feature for feature in selected_features if feature != i]

    if len(features_to_use) > 0:
        x_subset = x_train[:, features_to_use]
        classifier.fit(x_subset, y_train)
        y_pred_subset = classifier.predict(x_test[:, features_to_use])
        accuracy_subset = accuracy_score(y_test, y_pred_subset)

        if accuracy_subset > initial_accuracy:
            print(f"Removing feature in position {i} - Accuracy improved to {accuracy_subset:.4f}")
            initial_accuracy = accuracy_subset
            selected_features = features_to_use
        else:
            print(f"Keeping feature in position {i} - Accuracy: {accuracy_subset:.4f}")
    else:
        print(f"All features removed - Terminating Process")
        break

print("Selected Features:")
for feature_index in selected_features:
    if feature_index < len(dataset.columns) - 1:
        print(dataset.columns[feature_index])
    else:
        print("Invalid Index")

# Fit the best regressor on the training data



In [None]:
best_model.fit(x_train[:, selected_features], y_train)

# Make predictions on the test set using selected features



In [12]:
y_pred = best_model.predict(x_test[:, selected_features])

# Evaluate performance on test set

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

# Building Random Forest classifier instance

In [14]:
rf_classifier = RandomForestClassifier()

# Perform Grid Search to find the best hyperparameters (tuning)


In [15]:
param_grid = {
    'n_estimators': [100, 300, 500],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Get the best parameters and estimator from the Grid Search


In [None]:
grid_search = GridSearchCV(rf_classifier, param_grid, cv=5, scoring='accuracy')

grid_search.fit(x_train, y_train)

best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

best_rf_model = grid_search.best_estimator_

# Implementation of custom function to select accuracy-weighed features

In [None]:
selected_features = list(range(x_train.shape[1]))
initial_accuracy = accuracy_score(y_test, best_rf_model.predict(x_test))

for i in selected_features:
    features_to_use = [feature for feature in selected_features if feature != i]

    if len(features_to_use) > 0:
        x_subset = x_train[:, features_to_use]
        rf_classifier.fit(x_subset, y_train)
        y_pred_subset = rf_classifier.predict(x_test[:, features_to_use])
        accuracy_subset = accuracy_score(y_test, y_pred_subset)

        if accuracy_subset > initial_accuracy:
            print(f"Removing feature in position {i} - Accuracy improved to {accuracy_subset:.4f}")
            initial_accuracy = accuracy_subset
            selected_features = features_to_use
        else:
            print(f"Keeping feature in position {i} - Accuracy: {accuracy_subset:.4f}")
    else:
        print(f"All features removed - Terminating Process")
        break

print("Selected Features:")
for feature_index in selected_features:
    # Assuming 'dataset' has column names
    if feature_index < len(dataset.columns) - 1:
        print(dataset.columns[feature_index])
    else:
        print("Invalid Index")

# Fit the best regressor on the training data

In [None]:
best_rf_model.fit(x_train[:, selected_features], y_train)

# Make predictions on the test set using selected features

In [19]:
y_pred = best_rf_model.predict(x_test[:, selected_features])

# Evaluate performance on test set

In [None]:
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)