In [None]:
# =============================================================================
# ENSEMBLE LEARNING LAB - IMPORTS & LIBRARIES
# =============================================================================
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.preprocessing import LabelEncoder
import xgboost as xgb
import matplotlib.pyplot as plt
import seaborn as sns


In [None]:
# =============================================================================
# LOAD UCI HAR DATASET
# =============================================================================
df = pd.read_csv(r"C:\Users\Jaivansh Chawla\Documents\COLLEGE\3 YEAR\5 sem\ML\ML LAB\LAB 9\UCI_HAR_dataset.csv")

print("âœ… Dataset Loaded Successfully!")
print("Shape:", df.shape)
print(df.head())
print("\nColumns in Dataset:\n", df.columns.tolist())


In [None]:
# =============================================================================
# FEATURE-TARGET SPLIT
# =============================================================================
target_column = df.columns[-1]  # automatically using last column as target
print(f"\nUsing '{target_column}' as target column")

X = df.drop(target_column, axis=1)
y = df[target_column]

print(f"Features Shape: {X.shape}")
print(f"Target Shape: {y.shape}")


In [None]:
# =============================================================================
# PREPROCESSING - ONE-HOT ENCODING
# =============================================================================
X = pd.get_dummies(X, drop_first=True)
print(f"Features after encoding: {X.shape}")


In [None]:
# =============================================================================
# TRAIN-TEST SPLIT
# =============================================================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

print(f"Training Set Size: {X_train.shape}")
print(f"Test Set Size: {X_test.shape}")


RANDOM FOREST

In [None]:
# =============================================================================
# RANDOM FOREST - MODEL TRAINING
# =============================================================================
print("\n" + "="*80)
print("RANDOM FOREST CLASSIFIER")
print("="*80)

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_rf.fit(X_train, y_train)
print("ðŸŒ³ Random Forest Model Training Complete!")


In [None]:
# =============================================================================
# RANDOM FOREST - PREDICTIONS
# =============================================================================
y_pred_rf = model_rf.predict(X_test)
print("âœ… Predictions Generated!")


In [None]:
# =============================================================================
# RANDOM FOREST - ACCURACY & CLASSIFICATION REPORT
# =============================================================================
print("\nðŸŽ¯ RANDOM FOREST ACCURACY:", accuracy_score(y_test, y_pred_rf))
print("\nðŸ“Š RANDOM FOREST CLASSIFICATION REPORT:\n", classification_report(y_test, y_pred_rf))


In [None]:
# =============================================================================
# RANDOM FOREST - CONFUSION MATRIX
# =============================================================================
cm_rf = confusion_matrix(y_test, y_pred_rf)

plt.figure(figsize=(6,5))
sns.heatmap(cm_rf, annot=True, fmt='d', cmap='Blues')
plt.title("Random Forest - Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# =============================================================================
# RANDOM FOREST - FEATURE IMPORTANCE
# =============================================================================
importances_rf = model_rf.feature_importances_
indices_rf = np.argsort(importances_rf)[::-1]

plt.figure(figsize=(10,6))
sns.barplot(x=importances_rf[indices_rf][:10], y=X.columns[indices_rf][:10])
plt.title("Random Forest - Top 10 Important Features")
plt.xlabel("Importance")
plt.ylabel("Feature Names")
plt.show()


In [None]:
# =============================================================================
# RANDOM FOREST - PAIRPLOT
# =============================================================================
pairplot_cols = X.columns[:4].tolist() + [target_column]
pair_df = df[pairplot_cols]

sns.pairplot(pair_df, hue=target_column, diag_kind="kde")
plt.suptitle("Random Forest - Pairplot of Selected Features", y=1.02)
plt.show()


XGBOOST

In [None]:
# =============================================================================
# XGBOOsT - LABEL ENCODING
# =============================================================================
print("\n" + "="*80)
print("XGBOOST CLASSIFIER")
print("="*80)

le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

print("âœ… Label Encoding Complete!")


In [None]:
# =============================================================================
# XGBOOST - MODEL TRAINING
# =============================================================================
model_xgb = xgb.XGBClassifier(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)

model_xgb.fit(X_train, y_train_encoded)
print("ðŸš€ XGBoost Model Training Complete!\n")


In [None]:
# =============================================================================
# XGBOOST - PREDICTIONS
# =============================================================================
y_pred_xgb = model_xgb.predict(X_test)
print("âœ… Predictions Generated!")


In [None]:
# =============================================================================
# XGBOOST - ACCURACY & CLASSIFICATION REPORT
# =============================================================================
print("\nðŸŽ¯ XGBOOST ACCURACY:", accuracy_score(y_test_encoded, y_pred_xgb))
print("\nðŸ“Š XGBOOST CLASSIFICATION REPORT:\n", classification_report(
    y_test_encoded, y_pred_xgb, target_names=le.classes_
))


In [None]:
# =============================================================================
# XGBOOST - ACTIVITY DISTRIBUTION
# =============================================================================
plt.figure(figsize=(10, 5))
df['Activity'].value_counts().plot(kind='bar', color='skyblue', edgecolor='black')
plt.title('Activity Distribution in Dataset', fontsize=14, fontweight='bold')
plt.xlabel('Activity Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()


In [None]:
# =============================================================================
# XGBOOST - CONFUSION MATRIX (Regular + Normalized)
# =============================================================================
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

cm_xgb = confusion_matrix(y_test_encoded, y_pred_xgb)

# Regular Confusion Matrix
sns.heatmap(cm_xgb, annot=True, fmt="d", cmap="Greens", ax=axes,
            xticklabels=le.classes_, yticklabels=le.classes_)
axes.set_title("XGBoost - Confusion Matrix", fontsize=14, fontweight='bold')
axes.set_xlabel("Predicted")
axes.set_ylabel("Actual")

# Normalized Confusion Matrix
cm_normalized = cm_xgb.astype('float') / cm_xgb.sum(axis=1)[:, np.newaxis]
sns.heatmap(cm_normalized, annot=True, fmt=".2f", cmap="Blues", ax=axes,
            xticklabels=le.classes_, yticklabels=le.classes_)
axes.set_title("XGBoost - Normalized Confusion Matrix", fontsize=14, fontweight='bold')
axes.set_xlabel("Predicted")
axes.set_ylabel("Actual")

plt.tight_layout()
plt.show()


In [None]:
# =============================================================================
# XGBOOST - FEATURE IMPORTANCE (Top 15)
# =============================================================================
plt.figure(figsize=(10, 6))
feature_importance_xgb = model_xgb.feature_importances_
top_features_xgb = pd.DataFrame({
    'Feature': X.columns,
    'Importance': feature_importance_xgb
}).sort_values('Importance', ascending=False).head(15)

sns.barplot(data=top_features_xgb, x='Importance', y='Feature', palette='viridis')
plt.title('XGBoost - Top 15 Most Important Features', fontsize=14, fontweight='bold')
plt.xlabel('Importance Score')
plt.tight_layout()
plt.show()


In [None]:
# =============================================================================
# XGBOOST - PER-CLASS F1-SCORE
# =============================================================================
plt.figure(figsize=(10, 5))
report_dict_xgb = classification_report(y_test_encoded, y_pred_xgb, 
                                        target_names=le.classes_, 
                                        output_dict=True)
activities = le.classes_
f1_scores_xgb = [report_dict_xgb[act]['f1-score'] for act in activities]

bars = plt.bar(activities, f1_scores_xgb, color='teal', alpha=0.7, edgecolor='black')
plt.title('XGBoost - F1-Score per Activity', fontsize=14, fontweight='bold')
plt.ylabel('F1-Score')
plt.ylim([0, 1.1])
plt.xticks(rotation=45)
plt.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    plt.text(bar.get_x() + bar.get_width()/2., height,
             f'{height:.3f}', ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.show()

print("\nâœ… XGBoost Analysis Complete!")
