In [None]:
# Feature Importance Map
# Change the NAME.csv

import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the dataframe
df = pd.read_csv('NAME.csv')

# Assuming the first column is 'class' and the rest are spectral data
# and the first row contains the wavenumber values (except for the first element)
classes = df.iloc[:, 0]
wavenumbers = df.columns[1:].astype(float) # Extract and convert wavenumbers
spectra = df.iloc[:, 1:].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(spectra, classes, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
# You can adjust n_estimators and other parameters
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Get feature importances
importances = model.feature_importances_

# Create a DataFrame to store wavenumbers and importances
importance_df = pd.DataFrame({'Wavenumber': wavenumbers, 'Importance': importances})

# Sort by importance in descending order
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# Plot the feature importances
plt.figure(figsize=(12, 6))
plt.bar(importance_df['Wavenumber'], importance_df['Importance'])
plt.xlabel('Wavenumber ($cm^{-1}$)')
plt.ylabel('Feature Importance')
plt.title('Feature Importance Map for FTIR Spectra')
plt.xticks(rotation=90) # Rotate x-axis labels if they overlap
plt.tight_layout()
plt.show()

# Print the most important features
print("Top 10 Most Important Wavenumbers:")
print(importance_df.head(10))
