In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.inspection import permutation_importance
from sklearn.metrics import accuracy_score
import numpy as np

# Load the data
data = pd.read_csv('C:/Users/oceanlightai/Desktop/datasets/pet_skin/train/train_skin.csv')

# One-hot encode categorical variables
encoder = OneHotEncoder(sparse=False)
encoded_features = encoder.fit_transform(data[['breed', 'gender', 'region']])

# Create a new DataFrame with encoded features
data_encoded = pd.DataFrame(encoded_features, columns=encoder.get_feature_names(['breed', 'gender', 'region']))
data_encoded['age'] = data['age']
data_encoded['lesions'] = data['lesions']

# Encode the target variable
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(data_encoded['lesions'])
y_encoded = to_categorical(y_encoded)

In [None]:
# Split the data into train and test sets
X = data_encoded.drop('lesions', axis=1)
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
# Define the DNN model
model = Sequential()
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(y_encoded.shape[1], activation='softmax')) 

# Compile the model
model.compile(loss='categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_test, y_test))

In [None]:
model.save("dnn_model.h5")

In [None]:
# Function to get model score
def score(X, y):
    y_pred = model.predict(X)
    return accuracy_score(np.argmax(y, axis=1), np.argmax(y_pred, axis=1))

# Calculate permutation importance
result = permutation_importance(score, X_test.values, y_test, n_repeats=10, random_state=42, n_jobs=-1)

# Get importance
importance = result.importances_mean

# Summarize feature importance
for i, j in enumerate(importance):
    print('Feature:', X.columns[i], ', Score: ', j)