In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [2]:
# Load the car evaluation dataset
data = pd.read_csv(r"C:\Users\ayush\Microsoft\Downloads\car_evaluation.csv")

In [3]:
# Encoding all the string data
data = data.apply(LabelEncoder().fit_transform)

In [4]:
# Define the features (X) and the target variable (y)
X = data.iloc[:, :-1]  # Features (all columns except the last one)
y = data.iloc[:, -1]   # Target variable (last column)

In [5]:
# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [6]:
# Create a Random Forest Classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

In [7]:
# Train the classifier on the training data
rf_classifier.fit(X_train, y_train)

In [8]:
# Make predictions on the test data
y_pred = rf_classifier.predict(X_test)

In [9]:
# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
confusion = confusion_matrix(y_test, y_pred)
classification_rep = classification_report(y_test, y_pred)

In [10]:
print(f"Accuracy: {accuracy}")
print("\nConfusion Matrix:\n", confusion)
print("\nClassification Report:\n", classification_rep)

Accuracy: 0.9624277456647399

Confusion Matrix:
 [[ 72   1   3   1]
 [  2  10   0   3]
 [  1   0 236   0]
 [  2   0   0  15]]

Classification Report:
               precision    recall  f1-score   support

           0       0.94      0.94      0.94        77
           1       0.91      0.67      0.77        15
           2       0.99      1.00      0.99       237
           3       0.79      0.88      0.83        17

    accuracy                           0.96       346
   macro avg       0.91      0.87      0.88       346
weighted avg       0.96      0.96      0.96       346



# Practical 5 B)
AdaBoost (Adaptive Boosting), Gradient Tree Boosting (GBM), XGBoost classification on Iris dataset

In [13]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, VotingClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import KFold
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# Load the dataset
iris_data = pd.read_csv(r"C:\Users\ayush\Microsoft\Downloads\iris (1).csv")

In [14]:
# Split the data into features and target
X = iris_data.drop('Species', axis=1)
y = iris_data['Species']

In [15]:
# Map species to numerical values
species_map = {'Iris-setosa': 0, 'Iris-versicolor': 1, 'Iris-virginica': 2}
y = y.map(species_map)

In [16]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
# Define the models
ada_boost_model = AdaBoostClassifier(n_estimators=100, random_state=42)
gbm_model = GradientBoostingClassifier(n_estimators=100, random_state=42)
xgb_model = XGBClassifier(n_estimators=100, random_state=42)

In [18]:
# Define the voting models
voting_model_soft = VotingClassifier(estimators=[('ada', ada_boost_model), ('gbm', gbm_model), ('xgb', xgb_model)], voting='soft')
voting_model_hard = VotingClassifier(estimators=[('ada', ada_boost_model), ('gbm', gbm_model), ('xgb', xgb_model)], voting='hard')

In [19]:
# Train the models
ada_boost_model.fit(X_train_scaled, y_train)
gbm_model.fit(X_train_scaled, y_train)
xgb_model.fit(X_train_scaled, y_train)
voting_model_soft.fit(X_train_scaled, y_train)
voting_model_hard.fit(X_train_scaled, y_train)

In [20]:
# Make predictions
ada_boost_pred = ada_boost_model.predict(X_test_scaled)
gbm_pred = gbm_model.predict(X_test_scaled)
xgb_pred = xgb_model.predict(X_test_scaled)
voting_model_soft_pred = voting_model_soft.predict(X_test_scaled)
voting_model_hard_pred = voting_model_hard.predict(X_test_scaled)

In [21]:
# Evaluate the models
print("AdaBoost Model:")
print("Accuracy:", accuracy_score(y_test, ada_boost_pred))
print("Classification Report:\n", classification_report(y_test, ada_boost_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, ada_boost_pred))

AdaBoost Model:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [22]:
print("GBM Model:")
print("Accuracy:", accuracy_score(y_test, gbm_pred))
print("Classification Report:\n", classification_report(y_test, gbm_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, gbm_pred))

GBM Model:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [23]:
print("XGB Model:")
print("Accuracy:", accuracy_score(y_test, xgb_pred))
print("Classification Report:\n", classification_report(y_test, xgb_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, xgb_pred))

XGB Model:
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [24]:
print("Voting Model (Soft):")
print("Accuracy:", accuracy_score(y_test, voting_model_soft_pred))
print("Classification Report:\n", classification_report(y_test, voting_model_soft_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, voting_model_soft_pred))

Voting Model (Soft):
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


In [25]:
print("Voting Model (Hard):")
print("Accuracy:", accuracy_score(y_test, voting_model_hard_pred))
print("Classification Report:\n", classification_report(y_test, voting_model_hard_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, voting_model_hard_pred))

Voting Model (Hard):
Accuracy: 1.0
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00         9
           2       1.00      1.00      1.00        11

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

Confusion Matrix:
 [[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
