In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the data
data = pd.read_csv("./csv_result-dataset_54_vehicle.csv")  # Replace "your_data.csv" with the actual filename

# Explore the data
print(data.head())  # Display the first few rows of the dataset
print(data.info())  # Display information about the dataset

# Data Preprocessing
X = data.drop(columns=["Class", "id"])  # Features (remove "Class" and "id" columns)
y = data["Class"]  # Target variable

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Model Building
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy}")
print("Classification Report:")
print(report)

# Data Visualization
plt.figure(figsize=(12, 8))
sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.title("Correlation Heatmap")
plt.show()

# Pairplot for feature visualization
plt.figure(figsize=(15, 10))
sns.pairplot(data=data, hue="Class", markers=["o", "s", "D"])
plt.title("Pairplot of Features by Class")
plt.show()

# Boxplot for feature distribution by class
plt.figure(figsize=(12, 8))
sns.boxplot(x="Class", y="COMPACTNESS", data=data)
plt.title("Boxplot of COMPACTNESS by Class")
plt.show()

# You can add more visualizations based on your specific dataset and analysis requirements.

# Predict using the model (example)
sample_data = np.array([95, 48, 83, 178, 72, 10, 162, 42, 20, 159, 176, 379, 184, 70, 6, 16, 187, 197]).reshape(1, -1)
sample_data = scaler.transform(sample_data)
prediction = model.predict(sample_data)
print(f"Predicted class: {prediction[0]}")


   id 'COMPACTNESS'  'CIRCULARITY'  'DISTANCE_CIRCULARITY'  'RADIUS_RATIO'  \
0   1            95           48.0                    83.0           178.0   
1   2            91           41.0                    84.0           141.0   
2   3           104           50.0                   106.0           209.0   
3   4            93           41.0                    82.0           159.0   
4   5            85           44.0                    70.0           205.0   

   'PR.AXIS_ASPECT_RATIO'  'MAX.LENGTH_ASPECT_RATIO'  'SCATTER_RATIO'  \
0                    72.0                       10.0            162.0   
1                    57.0                        9.0            149.0   
2                    66.0                       10.0            207.0   
3                    63.0                        9.0            144.0   
4                   103.0                       52.0            149.0   

   'ELONGATEDNESS'  'PR.AXIS_RECTANGULARITY'  'MAX.LENGTH_RECTANGULARITY'  \
0             4

KeyError: "['Class'] not found in axis"