In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import LabelEncoder, StandardScaler
import matplotlib.pyplot as plt
import seaborn as sns

# Load CSV
df = pd.read_csv('train.csv', dtype={'label': str})

# Drop file name and label and set target
X = df.drop(columns=['filename', 'label'])
y = df['label']

# Encode target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)

# Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Creating the classifier
ann_classifier = MLPClassifier(hidden_layer_sizes=(850, 350), max_iter=1500, random_state=42, alpha=0.01)

# Training
ann_classifier.fit(X_train, y_train)

# Predicting
y_pred = ann_classifier.predict(X_test)

# Accuracy
accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)
print(f'Accuracy: {accuracy}')

# Decode labels
y_original = label_encoder.inverse_transform(y)

# Create a Pandas DataFrame with the original labels
df_original_labels = pd.DataFrame({'label': y_original})

# Class occurrence count
class_distribution = df_original_labels['label'].value_counts()

# Bar chart for class distribution
plt.figure(figsize=(10, 6))
class_distribution.plot(kind='bar')
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.show()

# Generate a correlation matrix for the feature variables
correlation_matrix = pd.DataFrame(X).corr()

# Set up the figure size
plt.figure(figsize=(10, 8))

# Create a heatmap to visualize the correlation matrix with annotations
sns.heatmap(correlation_matrix, annot=False, fmt='.2f', cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Matrix')
plt.show()
