In [1]:
# Importing the libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report, accuracy_score
import os

In [2]:
# Setting the working directory
os.chdir('C:\\Users\\manis\\OneDrive\\Desktop\\545 Data Mining Bus Intell\\Lab 20')

In [3]:
# Importing the dataset into a dataframe
amazon_sale = pd.read_excel('Amazon_Sale_Report.xlsx')

In [4]:
# Creating a new dataframe with only the required columns
amazon_sale = amazon_sale[['Order ID', 'Date', 'Status', 'Style','SKU', 'Qty']]

In [5]:
# Removing the rows with cancelled orders
amazon_sale = amazon_sale[amazon_sale['Status'] != 'Cancelled']

In [6]:
# Creating a new column with the total sale amount
amazon_sale.columns = amazon_sale.columns.str.replace(' ', '_')

In [7]:
# Data preparation
product_transactions = amazon_sale['SKU'].value_counts()
threshold = product_transactions.median()
amazon_sale['Is_Popular'] = amazon_sale['SKU'].apply(lambda x: 1 if product_transactions[x] > threshold else 0)

In [8]:
# display first 5 rows
amazon_sale.head()


Unnamed: 0,Order_ID,Date,Status,Style,SKU,Qty,Is_Popular
1,171-9198151-1101146,2022-04-30,Shipped - Delivered to Buyer,JNE3781,JNE3781-KR-XXXL,1,1
2,404-0687676-7273146,2022-04-30,Shipped,JNE3371,JNE3371-KR-XL,1,1
4,407-1069790-7240320,2022-04-30,Shipped,JNE3671,JNE3671-TU-XXXL,1,1
5,404-1490984-4578765,2022-04-30,Shipped,SET264,SET264-KR-NP-XL,1,1
6,408-5748499-6859555,2022-04-30,Shipped,J0095,J0095-SET-L,1,1


In [9]:
# Feature encoding
le_style = LabelEncoder()
le_sku = LabelEncoder()
amazon_sale['Style_Encoded'] = le_style.fit_transform(amazon_sale['Style'])
amazon_sale['SKU_Encoded'] = le_sku.fit_transform(amazon_sale['SKU'])

In [10]:
# display first 5 rows
amazon_sale.head()

Unnamed: 0,Order_ID,Date,Status,Style,SKU,Qty,Is_Popular,Style_Encoded,SKU_Encoded
1,171-9198151-1101146,2022-04-30,Shipped - Delivered to Buyer,JNE3781,JNE3781-KR-XXXL,1,1,845,4387
2,404-0687676-7273146,2022-04-30,Shipped,JNE3371,JNE3371-KR-XL,1,1,531,2596
4,407-1069790-7240320,2022-04-30,Shipped,JNE3671,JNE3671-TU-XXXL,1,1,752,3790
5,404-1490984-4578765,2022-04-30,Shipped,SET264,SET264-KR-NP-XL,1,1,1230,6207
6,408-5748499-6859555,2022-04-30,Shipped,J0095,J0095-SET-L,1,1,179,650


In [11]:
# Feature selection
features = amazon_sale[['Style_Encoded', 'SKU_Encoded', 'Qty']]
target = amazon_sale['Is_Popular']

In [12]:
# Splitting dataset
X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.3, random_state=42)

In [13]:
# Feature scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [14]:
# Model training
nn_classifier = MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42)
nn_classifier.fit(X_train_scaled, y_train)

In [15]:
# Model prediction
y_pred_nn = nn_classifier.predict(X_test_scaled)

In [16]:
# Model evaluation
accuracy_nn = accuracy_score(y_test, y_pred_nn)
report_nn = classification_report(y_test, y_pred_nn)

In [20]:
# display results
print('Accuracy of MLPClassifier: ', accuracy_nn)
print('Classification report of MLPClassifier: \n', report_nn)


Accuracy of MLPClassifier:  0.8871750067785377
Classification report of MLPClassifier: 
               precision    recall  f1-score   support

           0       0.66      0.02      0.03      3774
           1       0.89      1.00      0.94     29419

    accuracy                           0.89     33193
   macro avg       0.77      0.51      0.49     33193
weighted avg       0.86      0.89      0.84     33193

