In [1]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense




In [2]:
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error


# Classification using a fully connected ANN

In [3]:
df = pd.read_csv('Bike Sales Dashboard.csv')

In [4]:
df.head()

Unnamed: 0,ID,Martial Status,Gender,Income,Children,Education,Occupation,Home Owner,Cars,Commute Distance,Region,Age,Age Brackets,Purchased Bike
0,12496,Married,Female,"$40,000",1,Bachelors,Skilled Manual,Yes,0,0-1 Miles,Europe,42,Middle Age,No
1,24107,Married,Male,"$30,000",3,Partial College,Clerical,Yes,1,0-1 Miles,Europe,43,Middle Age,No
2,14177,Married,Male,"$80,000",5,Partial College,Professional,No,2,2-5 Miles,Europe,60,Old,No
3,24381,Single,Male,"$70,000",0,Bachelors,Professional,Yes,1,5-10 Miles,Pacific,41,Middle Age,Yes
4,25597,Single,Male,"$30,000",0,Bachelors,Clerical,No,0,0-1 Miles,Europe,36,Middle Age,Yes


# Proprocessing the Data

In [5]:
df.isna().sum()

ID                  0
Martial Status      0
Gender              0
Income              0
Children            0
Education           0
Occupation          0
Home Owner          0
Cars                0
Commute Distance    0
Region              0
Age                 0
Age Brackets        0
Purchased Bike      0
dtype: int64

In [6]:
#There are no NA values.

In [7]:
df = df.drop(columns='ID')

In [8]:
# Remove dollar signs and commas and convert to float
df['Income'] = df['Income'].replace({'\$': '', ',': ''}, regex=True).astype(float)

# Verify the changes
print(df['Income'].head())


0    40000.0
1    30000.0
2    80000.0
3    70000.0
4    30000.0
Name: Income, dtype: float64


In [9]:
cat_col = ['Martial Status', 'Gender', 'Education', 'Occupation', 'Home Owner', 'Commute Distance', 'Region', 'Age Brackets', 'Purchased Bike']
# num_col = ['Income', 'Children', 'Cars', 'Age']

In [10]:
# Encoding catagorical variables using a Label Encoder

for col in cat_col:
    df[col] = LabelEncoder().fit_transform(df[col])



In [11]:
df.head()

Unnamed: 0,Martial Status,Gender,Income,Children,Education,Occupation,Home Owner,Cars,Commute Distance,Region,Age,Age Brackets,Purchased Bike
0,0,0,40000.0,1,0,4,1,0,0,0,42,1,0
1,0,1,30000.0,3,3,0,1,1,0,0,43,1,0
2,0,1,80000.0,5,3,3,0,2,2,0,60,2,0
3,1,1,70000.0,0,0,3,1,1,3,2,41,1,1
4,1,1,30000.0,0,0,0,0,0,0,0,36,1,1


# Preparing training and testing data

In [12]:
# Splitting the target variable and features

X = df.drop(columns=['Purchased Bike'])
y = df['Purchased Bike']

In [13]:
X_train, X_test, y_train,y_test = train_test_split(X,y, random_state=42, test_size=0.2)

In [14]:
# Ensure X_train is of type float32
X_train = np.array(X_train, dtype=np.float32)
X_test = np.array(X_test, dtype=np.float32)

# Ensure y_train is of type float32
y_train = np.array(y_train, dtype=np.float32)
y_test = np.array(y_test, dtype=np.float32)

# Building and Training an ANN

In [15]:
model = Sequential()
model.add(Dense(6, activation='relu',input_dim = X_train.shape[1])) # Input layer
model.add(Dense(6, activation='relu')) # Hidden layer
model.add(Dense(1, activation='sigmoid')) #Output layer

# Compiling the model

model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics =['accuracy'])

# Training the model

model.fit(X_train, y_train, batch_size=10, epochs=100)



Epoch 1/100


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100


Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x2239606f820>

# Evaluating the Model

In [16]:
# Predicting the Test set results
y_pred = model.predict(X_test)
y_pred = (y_pred > 0.5)

# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)

print(cm)
print(f'Accuracy: {accuracy}')


[[106   0]
 [ 94   0]]
Accuracy: 0.53


# Classification using fully connected ANN

In [61]:
data = pd.read_csv('dress_rental_prices.csv')

In [62]:
data.head()

Unnamed: 0.1,Unnamed: 0,ID,Name,Brand,Colour,Catagories,Price
0,0,74416,Runway stripe dress,Stella McCartney,beige,dresses,111
1,1,73815,Reformation Kourtney Dress,Reformation,beige,dresses,50
2,2,73801,Ivory Viola bridal dress,Ghost,beige,dresses,75
3,3,73718,Pasu Dress - Rhino Tusk,Coucoo,beige,dresses,37
4,4,73605,Ellen,RIXO,beige,dresses,47


# Preprocessing


In [63]:
data = data.drop(columns=['Unnamed: 0','ID','Name','Catagories'])

In [64]:
data['Brand'].nunique()

931

In [65]:
data = pd.get_dummies(data, columns=['Brand', 'Colour'], drop_first=True)

In [66]:
data.head(5)

Unnamed: 0,Price,Brand_& Other Stories,Brand_1017 ALYX 9SM,Brand_12 storeez,Brand_16ARLINGTON,Brand_3.1 Phillip Lim,Brand_A Star is Born,Brand_A.L.C,Brand_A.P.C.,Brand_A.W.A.K.E.,...,Colour_navy,Colour_orange,Colour_phantom,Colour_pink,Colour_purple,Colour_red,Colour_silver,Colour_tan,Colour_white,Colour_yellow
0,111,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,50,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,75,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,37,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,47,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [67]:
X = data.drop('Price', axis=1)
y = data['Price']



# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)



# Building and Training the ANN

In [68]:
model = Sequential()

model.add(Dense(64, activation='relu', input_dim=X_train.shape[1])) #Input layer

model.add(Dense(64, activation='relu')) #Hidden layer

model.add(Dense(32, activation='relu')) #Hidden layer

model.add(Dense(1)) #Output

# Compiling the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mean_absolute_error'])

# Training the model
model.fit(X_train, y_train, batch_size=32, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100


Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.src.callbacks.History at 0x223a10ac190>

In [69]:
# Predicting the test set results
y_pred = model.predict(X_test)

# Evaluate the model
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')

Mean Absolute Error: 17.0199860514005
Mean Squared Error: 851.5502208468116
