<a href="https://colab.research.google.com/github/fatimaabuhamdeh/deep/blob/main/AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from imblearn.under_sampling import RandomUnderSampler
from imblearn.over_sampling import RandomOverSampler
from imblearn.metrics import classification_report_imbalanced
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

In [2]:
# Load the dataset
data = pd.read_csv("/content/student_data.csv")

In [14]:
# Preprocessing: Encode categorical variables if present
data = pd.get_dummies(data)

# Separate features and target variable
X = data.drop('G3', axis=1)  # Replace 'target_variable_column_name' with your target column
y = data['G3']

In [15]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Apply resampling: undersampling
undersampler = RandomUnderSampler(random_state=42)
X_resampled_under, y_resampled_under = undersampler.fit_resample(X_train, y_train)

# Apply resampling: oversampling
oversampler = RandomOverSampler(random_state=42)
X_resampled_over, y_resampled_over = oversampler.fit_resample(X_train, y_train)

In [17]:
# Function to train and evaluate model
def train_evaluate_model(X_train, y_train, X_test, y_test, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted')
    recall = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    # For multi-class classification, AUC is not directly supported
    auc = None
    return accuracy, precision, recall, f1, auc

In [18]:
# Initialize ensemble models
bagging_model = RandomForestClassifier(random_state=42)
boosting_model = AdaBoostClassifier(random_state=42)

In [19]:
# Train and evaluate models on original data
accuracy_bagging, precision_bagging, recall_bagging, f1_bagging, auc_bagging = train_evaluate_model(X_train, y_train, X_test, y_test, bagging_model)
accuracy_boosting, precision_boosting, recall_boosting, f1_boosting, auc_boosting = train_evaluate_model(X_train, y_train, X_test, y_test, boosting_model)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [20]:
# Print evaluation metrics for models on original data
print("Metrics for Bagging Model (Original Data):")
print(f"Accuracy: {accuracy_bagging:.4f}, Precision: {precision_bagging:.4f}, Recall: {recall_bagging:.4f}, F1-score: {f1_bagging:.4f}")

print("\nMetrics for Boosting Model (Original Data):")
print(f"Accuracy: {accuracy_boosting:.4f}, Precision: {precision_boosting:.4f}, Recall: {recall_boosting:.4f}, F1-score: {f1_boosting:.4f}")

Metrics for Bagging Model (Original Data):
Accuracy: 0.3165, Precision: 0.2881, Recall: 0.3165, F1-score: 0.2776

Metrics for Boosting Model (Original Data):
Accuracy: 0.1772, Precision: 0.0406, Recall: 0.1772, F1-score: 0.0635


In [21]:
optimized_bagging_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)  # Example hyperparameters, replace with tuned values
optimized_boosting_model = AdaBoostClassifier(n_estimators=100, learning_rate=0.1, random_state=42)  # Example hyperparameters, replace with tuned values

In [22]:
# Train optimized models
optimized_accuracy_bagging, optimized_precision_bagging, optimized_recall_bagging, optimized_f1_bagging, optimized_auc_bagging = train_evaluate_model(X_train, y_train, X_test, y_test, optimized_bagging_model)
optimized_accuracy_boosting, optimized_precision_boosting, optimized_recall_boosting, optimized_f1_boosting, optimized_auc_boosting = train_evaluate_model(X_train, y_train, X_test, y_test, optimized_boosting_model)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [23]:
# Print evaluation metrics for optimized models
print("\nMetrics for Optimized Bagging Model:")
print(f"Accuracy: {optimized_accuracy_bagging:.4f}, Precision: {optimized_precision_bagging:.4f}, Recall: {optimized_recall_bagging:.4f}, F1-score: {optimized_f1_bagging:.4f}")

print("\nMetrics for Optimized Boosting Model:")
print(f"Accuracy: {optimized_accuracy_boosting:.4f}, Precision: {optimized_precision_boosting:.4f}, Recall: {optimized_recall_boosting:.4f}, F1-score: {optimized_f1_boosting:.4f}")


Metrics for Optimized Bagging Model:
Accuracy: 0.3671, Precision: 0.3622, Recall: 0.3671, F1-score: 0.3058

Metrics for Optimized Boosting Model:
Accuracy: 0.2025, Precision: 0.1261, Recall: 0.2025, F1-score: 0.1305
