Step 1: Preprocess the Dataset

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, mean_squared_error, classification_report

file_path = '/content/drive/MyDrive/Dec2019.xlsx'

data = pd.read_excel(file_path)

numeric_data = data.select_dtypes(include=[np.number])

numeric_data = numeric_data.dropna()

# Split into features (X) and target (y)
X = numeric_data.iloc[:, :-1].to_numpy()
y = numeric_data.iloc[:, -1].to_numpy()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


Step 2: Train a Machine Learning Model

In [3]:
# Initialize the Random Forest Classifier
model = RandomForestClassifier(random_state=42)

# Train the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)


Step 3: Evaluate the Model

In [4]:
accuracy = accuracy_score(y_test, y_pred)

precision = precision_score(y_test, y_pred, average='weighted')  # Use 'macro' or 'micro' for multi-class data

recall = recall_score(y_test, y_pred, average='weighted')

f1 = f1_score(y_test, y_pred, average='weighted')

# Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)

# Classification Report
report = classification_report(y_test, y_pred)

# Print the metrics
print("Model Performance Metrics:")
print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1-score: {f1:.2f}")
print(f"Mean Squared Error (MSE): {mse:.2f}")
print("\nClassification Report:")
print(report)


Model Performance Metrics:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-score: 1.00
Mean Squared Error (MSE): 0.00

Classification Report:
              precision    recall  f1-score   support

           1       1.00      1.00      1.00      3960

    accuracy                           1.00      3960
   macro avg       1.00      1.00      1.00      3960
weighted avg       1.00      1.00      1.00      3960

