In [67]:
import numpy as np 

In [68]:
data = np.genfromtxt("../../data/Files/DATA/bank_note_data.txt",delimiter=",")
# better if we used csv files 

In [69]:
X = data[:,:-1]
y = np.int64(data[:,-1])

### Split data into training and testing 

In [70]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.333,train_size=0.666,random_state=42)

- Scale data for better results 

In [71]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
# fit the data to be between 0 and 1 
scaler.fit(X_train)
scaler.transform(X_train)

scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test) 

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense


model = Sequential()

model.add(Dense(4,input_dim=4,activation='relu'))
model.add(Dense(8,activation='relu'))
model.add(Dense(1,activation='sigmoid'))


In [73]:
model.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [74]:
model.fit(scaled_X_train,y_train,epochs=60,verbose=1)

Epoch 1/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.4551 - loss: 0.7496
Epoch 2/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.4387 - loss: 0.7132
Epoch 3/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5237 - loss: 0.6960
Epoch 4/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5755 - loss: 0.6837
Epoch 5/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6119 - loss: 0.6740
Epoch 6/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5994 - loss: 0.6686 
Epoch 7/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6019 - loss: 0.6607
Epoch 8/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.6428 - loss: 0.6520
Epoch 9/60
[1m29/29[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

<keras.src.callbacks.history.History at 0x71bbe97fd6c0>

The number of epochs can significantly affect the result and goodness of a machine learning model, particularly in the context of training neural networks.

### What is an Epoch?

An epoch is one complete pass through the entire training dataset. During each epoch, the model processes every training example once and updates its weights based on the computed gradients.

### How the Number of Epochs Affects the Model

1. **Underfitting**:
   - If the number of epochs is too low, the model may not have enough opportunities to learn from the training data. This can result in underfitting, where the model performs poorly on both the training and test datasets because it hasn't captured the underlying patterns in the data.

2. **Optimal Training**:
   - With an appropriate number of epochs, the model can learn the patterns in the training data effectively. This results in good performance on both the training and test datasets. The model has had enough time to learn but not so much that it starts to memorize the training data.

3. **Overfitting**:
   - If the number of epochs is too high, the model may start to overfit the training data. Overfitting occurs when the model learns the noise and details in the training data to the extent that it negatively impacts its performance on new, unseen data. The model performs well on the training data but poorly on the test data.

### Monitoring Training with Validation Data

To find the optimal number of epochs, it's common to use a validation dataset. By monitoring the model's performance on the validation data during training, you can identify the point at which the model starts to overfit. This is typically done using techniques such as:

1. **Early Stopping**:
   - Early stopping is a regularization technique where training is stopped when the performance on the validation dataset starts to degrade. This helps prevent overfitting by stopping training at the optimal point.

2. **Learning Curves**:
   - Plotting learning curves for both the training and validation datasets can help visualize the model's performance over epochs. The goal is to find the point where the validation loss stops decreasing and starts increasing, indicating overfitting.

### Summary

In summary, the number of epochs is a crucial hyperparameter that affects the model's ability to learn from the training data. Too few epochs can lead to underfitting, while too many epochs can lead to overfitting. Using techniques like early stopping and monitoring learning curves can help find the optimal number of epochs, ensuring that the model generalizes well to new, unseen data.

In [75]:
predict_y = model.predict(X_test)
predicted_classes = (predict_y >= 0.5).astype(int)

[1m15/15[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step 


In [76]:
from sklearn.metrics import confusion_matrix,classification_report

In [77]:
print(classification_report(y_test,predicted_classes))

              precision    recall  f1-score   support

           0       0.98      0.93      0.95       258
           1       0.91      0.97      0.94       199

    accuracy                           0.95       457
   macro avg       0.95      0.95      0.95       457
weighted avg       0.95      0.95      0.95       457



In [None]:
confusion_matrix(y_test,predicted_classes)

In [79]:
model.save('Bank-Notes.keras')