# Artificial Neural Network

### We will use tensorflow to calculate the accuracy of ANN model regarding first 22-28 columns high level quantities

In [1]:
# We import the libraries that we are going to need.
import numpy as np
import pandas as pd
import tensorflow as tf

## Part 1 - Data Preprocessing

### Importing the dataset

In [2]:
import pandas as pd
df = pd.read_csv(r"C:\Users\Asus\Desktop\HIGGS_8K.csv",header=None)
df.head() #View first rows

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,19,20,21,22,23,24,25,26,27,28
0,1.0,0.869293,-0.635082,0.22569,0.32747,-0.689993,0.754202,-0.248573,-1.092064,0.0,...,-0.010455,-0.045767,3.101961,1.35376,0.979563,0.978076,0.920005,0.721657,0.988751,0.876678
1,0.0,1.630428,0.404143,0.401026,2.722409,0.355644,1.362199,0.015818,-1.174111,0.0,...,0.387635,1.596321,1.550981,0.859235,0.827233,0.995416,0.764766,0.520597,0.850413,0.929865
2,1.0,0.326305,-0.797735,0.223471,1.248234,-1.427857,0.923767,-0.27828,-0.470052,0.0,...,-0.426866,1.153501,0.0,0.887944,0.924601,0.987189,0.854094,0.385013,0.789912,0.724627
3,1.0,1.38117,0.521993,0.574698,0.357347,0.037136,0.413057,-0.607036,-1.30827,0.0,...,0.567525,-1.475778,0.0,0.936186,1.107217,0.983808,0.693327,0.562045,0.825387,0.846233
4,1.0,0.304161,-0.736375,0.686225,0.477172,-1.274736,0.907003,0.32477,-0.463954,2.173076,...,0.904818,1.165154,0.0,1.18179,1.033701,0.993943,0.648399,0.936097,0.732592,0.661779


In [3]:
# We neglect the first line since it contains an error as in the Random Forest,Decision Tree notebooks.
X = df.iloc[1:,22:]
y = df.iloc[1:,0]

### Splitting the dataset into the Training set and Test set

In [4]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

### Feature Scaling

In [5]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

## Part 2 - Building the ANN

### Initializing the ANN

In [6]:
from keras.models import Sequential
from keras.layers import Dense, Input

In [7]:
ann = tf.keras.models.Sequential()

### Adding the input layer and the first hidden layer

In [8]:
ann.add(Input(shape=(7,)))  # This is the Input layer

In [9]:
ann.add(tf.keras.layers.Dense(10, activation='relu'))

### Adding the second and third hidden layer

In [10]:
ann.add(tf.keras.layers.Dense(20, activation='relu'))
ann.add(tf.keras.layers.Dense(8, activation='relu'))

### Adding the output layer

In [11]:
# We have binary classification Signal 1 or Background 0 so we use sigmoid
ann.add(tf.keras.layers.Dense(units=1, activation='sigmoid'))

## Part 3 - Training the ANN

### Compiling the ANN

In [12]:
ann.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

In [13]:
ann.summary()

### Training the ANN on the Training set

In [14]:
ann.fit(X_train, y_train, batch_size = 16, epochs = 100)

Epoch 1/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - accuracy: 0.5427 - loss: 0.6891
Epoch 2/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6139 - loss: 0.6583
Epoch 3/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6324 - loss: 0.6415
Epoch 4/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6442 - loss: 0.6220
Epoch 5/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6629 - loss: 0.6058
Epoch 6/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6670 - loss: 0.6100
Epoch 7/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6724 - loss: 0.5988
Epoch 8/100
[1m400/400[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6844 - loss: 0.5889
Epoch 9/100
[1m400/400[0m [32

<keras.src.callbacks.history.History at 0x1e8b5918bb0>

## Part 4 - Making the predictions and evaluating the model

### Predicting the Test set results

In [15]:
y_pred = ann.predict(X_test)
y_pred=(y_pred>0.5)

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


### Making the Confusion Matrix

In [16]:
from sklearn.metrics import confusion_matrix, accuracy_score
cm = confusion_matrix(y_test, y_pred)
print(cm)
accuracy_score(y_test, y_pred)

[[544 226]
 [287 543]]


0.679375

In [17]:
# Evaluate the model on test data
loss, accuracy = ann.evaluate(X_test, y_test)
print(f'Test loss: {loss}')
print(f'Test accuracy: {accuracy}')

[1m50/50[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.6685 - loss: 0.5900
Test loss: 0.5833612084388733
Test accuracy: 0.6793749928474426


In [18]:
# Checking for overfitting.Evaluate the model on train data and compare this accuracy with the previous one.
train_loss, train_acc = ann.evaluate(X_train, y_train)
print("Training Accuracy:", train_acc)


[1m200/200[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7349 - loss: 0.5216
Training Accuracy: 0.7276562452316284
