In [1]:
# Set the seed value for the notebook so the results are reproducible
from numpy.random import seed
seed(1)

In [2]:
# Dependencies
import numpy as np
import pandas as pd

In [3]:
import tensorflow
tensorflow.keras.__version__

'2.2.4-tf'

In [5]:
Eruptions = pd.read_csv('all_files.csv')
Eruptions.head()

Unnamed: 0,time,latitude,longitude,depth,mag,magType,nst,gap,dmin,rms,...,magError,magNst,status,locationSource,magSource,Name,Year,VEI,Lat_Eruption,Lon_Eruption
0,2001-04-22T13:56:35.450Z,37.719,15.097,10.0,4.1,mb,50.0,,,,...,,1.0,reviewed,rom,us,Etna,2002,3,37.734,15.004
1,2002-08-19T15:03:23.600Z,37.697,14.967,5.0,2.8,md,9.0,,,,...,,,reviewed,rom,rom,Etna,2002,3,37.734,15.004
2,2002-08-19T14:57:59.700Z,37.721,14.963,5.0,2.7,md,8.0,,,,...,,,reviewed,rom,rom,Etna,2002,3,37.734,15.004
3,2002-08-19T12:05:21.700Z,37.708,14.965,5.0,2.8,md,10.0,,,,...,,,reviewed,rom,rom,Etna,2002,3,37.734,15.004
4,2002-08-13T05:29:43.300Z,37.842,14.667,5.0,3.2,md,18.0,,,,...,,,reviewed,rom,rom,Etna,2002,3,37.734,15.004


## Data Pre-Processing

In [6]:
Eruptions.dtypes

time                object
latitude           float64
longitude          float64
depth              float64
mag                float64
magType             object
nst                float64
gap                float64
dmin               float64
rms                float64
net                 object
id                  object
updated             object
place               object
type                object
horizontalError    float64
depthError         float64
magError           float64
magNst             float64
status              object
locationSource      object
magSource           object
Name                object
Year                 int64
VEI                  int64
Lat_Eruption       float64
Lon_Eruption       float64
dtype: object

In [23]:
selected_features = Eruptions[['latitude','longitude','depth','mag','Lat_Eruption','Lon_Eruption','VEI']]
selected_features.head()

Unnamed: 0,latitude,longitude,depth,mag,Lat_Eruption,Lon_Eruption,VEI
0,37.719,15.097,10.0,4.1,37.734,15.004,3
1,37.697,14.967,5.0,2.8,37.734,15.004,3
2,37.721,14.963,5.0,2.7,37.734,15.004,3
3,37.708,14.965,5.0,2.8,37.734,15.004,3
4,37.842,14.667,5.0,3.2,37.734,15.004,3


In [24]:
X = selected_features.drop("VEI", axis=1)
y = selected_features["VEI"]
print(X.shape, y.shape)

(1187, 6) (1187,)


In [25]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical

In [26]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1)

In [27]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [28]:
# Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [29]:
# Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [30]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [31]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=20))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [32]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [33]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 100)               2100      
_________________________________________________________________
dense_1 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 202       
Total params: 12,402
Trainable params: 12,402
Non-trainable params: 0
_________________________________________________________________


In [34]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    epochs=60,
    shuffle=True,
    verbose=2
)

ValueError: Error when checking input: expected dense_input to have shape (20,) but got array with shape (6,)

## Quantify our Trained Model

In [16]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=2)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

792/1 - 0s - loss: 0.0328 - accuracy: 0.9798
Normal Neural Network - Loss: 0.05984371190777782, Accuracy: 0.9797979593276978


## Make Predictions

In [17]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [18]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: ['female' 'female' 'female' 'female' 'female']
Actual Labels: ['female', 'female', 'female', 'female', 'female']
