# Neural Networks


In [98]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import precision_score, recall_score, multilabel_confusion_matrix

## Load Files

In [99]:
df = pd.read_csv('/Users/hannah-ann/PycharmProjects/cosmetic-ingredient-classifier-app/data/cleaned/cleaned_dataset.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 763 entries, 0 to 762
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Ingredients  763 non-null    object
 1   Combination  763 non-null    int64 
 2   Dry          763 non-null    int64 
 3   Normal       763 non-null    int64 
 4   Oily         763 non-null    int64 
 5   Sensitive    763 non-null    int64 
dtypes: int64(5), object(1)
memory usage: 35.9+ KB


## Prepare Data - Feature Extraction Vectorise

In [100]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(df['Ingredients'])
y = df[['Combination', 'Dry', 'Normal', 'Oily', 'Sensitive']]

## Split Data

In [101]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Train Classifier Model

In [102]:
# Create the Neural Network Model
model  = make_pipeline(
    StandardScaler(with_mean=False),
    MLPClassifier(activation="relu",hidden_layer_sizes=(64, 32), max_iter=1000, random_state=42))

model.fit(X_train, y_train)

# k-FOLD CORSS VALIDATION
scores = cross_val_score(model, X, y, cv=5)

print("Cross validation :",scores)
average_accuracy = np.mean(scores)
print("Average accuracy :",average_accuracy)

Cross validation : [0.5751634  0.5751634  0.67973856 0.67105263 0.63157895]
Average accuracy : 0.6265393876848986


In [103]:
y_pred = model.predict(X_test)

In [104]:
# Calculate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred,normalize=True)
print(f"Accuracy: {accuracy}")

# Calculate the precision of the model
precision = precision_score(y_test, y_pred,average='weighted')
print(f"Precision: {precision}")

# Calculate to recall fo the model07986r-00-
recall = recall_score(y_test, y_pred,average='micro')

print(f"Recall: {recall}")


# multi label confusion matrix
confusion_matrix = multilabel_confusion_matrix(y_test, y_pred)
print(confusion_matrix)

report = classification_report(y_test, y_pred,target_names=['Combination', 'Dry', 'Normal', 'Oily', 'Sensitive'])
print(report)


Accuracy: 0.6339869281045751
Precision: 0.9277031319836562
Recall: 0.9728958630527818
[[[  0   1]
  [  0 152]]

 [[  2  10]
  [  2 139]]

 [[  0   2]
  [  0 151]]

 [[  0  16]
  [  3 134]]

 [[  7  26]
  [ 14 106]]]
              precision    recall  f1-score   support

 Combination       0.99      1.00      1.00       152
         Dry       0.93      0.99      0.96       141
      Normal       0.99      1.00      0.99       151
        Oily       0.89      0.98      0.93       137
   Sensitive       0.80      0.88      0.84       120

   micro avg       0.93      0.97      0.95       701
   macro avg       0.92      0.97      0.94       701
weighted avg       0.93      0.97      0.95       701
 samples avg       0.93      0.97      0.94       701

