# Keras Classifier (Experimental)

### Import Libraries

In [1]:
import os
import torch
import torch.nn as nn

import pandas as pd
import matplotlib.pyplot as plt



In [2]:
from google.colab import files
uploaded = files.upload()

csv_file = "Formatted_ETAS_Output.csv"

# Read the CSV file using Pandas
df = pd.read_csv(csv_file, sep=',', lineterminator='\n')

# # Access the data in the DataFrame


ModuleNotFoundError: No module named 'google'

In [4]:
summary_stats = df.describe(include="all")
print(summary_stats)

           Date        Time          Year            X             Y  \
count     32001       32001  32001.000000  32001.00000  32001.000000   
unique    19127       26803           NaN          NaN           NaN   
top     4/26/64  0:31:11.00           NaN          NaN           NaN   
freq         10           5           NaN          NaN           NaN   
mean        NaN         NaN   1991.924790   -117.54676     34.810868   
std         NaN         NaN     18.407271      2.11036      2.388448   
min         NaN         NaN   1960.002196   -123.48700     29.080400   
25%         NaN         NaN   1975.851679   -118.73330     33.343000   
50%         NaN         NaN   1992.311728   -117.20600     34.587100   
75%         NaN         NaN   2007.784510   -116.10520     36.820000   
max         NaN         NaN   2023.487118   -113.24630     39.475100   

           Magnitude           Z\r  
count   32001.000000  32001.000000  
unique           NaN           NaN  
top              NaN    

### Data Table

In [None]:
new_df = df.copy()

# Convert "Date" column to datetime in the new DataFrame
new_df["Date"] = pd.to_datetime(new_df["Date"], errors="coerce", format="%m/%d/%y")

# Adjust two-digit year values to four-digit format in the new DataFrame
new_df.loc[new_df["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

# Extract components from the "Date" column in the new DataFrame
new_df["Day"] = new_df["Date"].dt.day
new_df["Month"] = new_df["Date"].dt.month
new_df["Year"] = new_df["Date"].dt.year
new_df["DayOfWeek"] = new_df["Date"].dt.dayofweek
new_df["Quarter"] = new_df["Date"].dt.quarter

# Convert "Time" column to datetime in the new DataFrame
new_df["Time"] = pd.to_datetime(new_df["Time"], format="%H:%M:%S.%f")

# Extract time components in the new DataFrame
new_df["Hour"] = new_df["Time"].dt.hour
new_df["Minute"] = new_df["Time"].dt.minute
new_df["Second"] = new_df["Time"].dt.second
new_df["Millisecond"] = new_df["Time"].dt.microsecond // 1000

# Drop the original "Time" column from the new DataFrame
new_df = new_df.drop("Time", axis=1)

# Drop the "Date" column from the new DataFrame
new_df = new_df.drop("Date", axis=1)
new_df = new_df.drop("Year", axis=1)

new_df.head()

Unnamed: 0,X,Y,Magnitude,Z\r,Day,Month,DayOfWeek,Quarter,Hour,Minute,Second,Millisecond
0,-119.0502,33.979,6.5,8.2474,31,12,3,4,0,3,9,0
1,-115.6222,33.0793,4.25,7.9322,2,1,5,1,0,8,49,0
2,-115.6323,33.122,3.03,8.4015,2,1,5,1,0,10,31,0
3,-115.5851,33.0745,3.03,7.9678,2,1,5,1,0,10,32,0
4,-115.6256,33.029,3.08,7.9737,2,1,5,1,0,11,7,0


### Model Training And Preprocessing

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X = new_df.drop("Magnitude", axis=1)  # Features
y = new_df["Magnitude"]  # Target variable

# Create a StandardScaler object
scaler = StandardScaler()

# Scale the features
X_scaled = scaler.fit_transform(X)

# Split the scaled data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.33, random_state=42)

# Print the data to verify the changes
print("Scaled X_train:")
print(X_train)

print("y_train:")
print(y_train)

print("Scaled X_test:")
print(X_test)

print("y_test:")
print(y_test)


Scaled X_train:
[[ 1.8470883  -1.55972662 -0.77871094 ...  1.12688298 -0.54222518
   0.        ]
 [ 0.30505242  0.70135846 -0.52148207 ...  1.01160055 -0.54222518
   0.        ]
 [-0.73255927  1.11489704  1.01761294 ...  0.89631811 -1.4696426
   0.        ]
 ...
 [-0.40360552 -0.5749714   1.65289841 ... -0.89055958  1.54446401
   0.        ]
 [ 0.72015396  0.9207512   1.45023324 ... -0.37178863  0.84890095
   0.        ]
 [-1.95810392  1.04719511  1.58157829 ...  1.70329514  1.02279171
   0.        ]]
y_train:
19364    3.02
18995    3.00
12854    3.02
25371    3.86
21813    3.38
         ... 
29802    3.24
5390     3.05
860      3.14
15795    3.29
23654    5.13
Name: Magnitude, Length: 21440, dtype: float64
Scaled X_test:
[[ 1.04958158 -1.03921315  1.44085029 ...  0.95395933  1.19668248
   0.        ]
 [-0.04205018  0.70303321  0.13277997 ... -1.3516893  -0.426298
   0.        ]
 [-0.2884101   1.43117429 -1.3567102  ... -0.48707107  1.25464607
   0.        ]
 ...
 [ 1.93243052 -2.19638

### Keras Classifier

In [None]:
from keras.models import Sequential
from keras.layers import Dense

def create_model(neurons, activation, optimizer, loss):
    model = Sequential()
    model.add(Dense(neurons, activation=activation, input_shape=(11,)))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(2, activation='softmax'))

    model.compile(optimizer=optimizer, loss=loss, metrics=['accuracy'])

    return model

In [None]:
from keras.wrappers.scikit_learn import KerasClassifier

model = KerasClassifier(build_fn=create_model, verbose=0)

neurons = [16]
batch_size = [10]
epochs = [10]

# activation = ['relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear', 'exponential']
activation = ['sigmoid', 'relu']
# optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

optimizer = ['SGD', 'Adadelta']
loss = ['squared_hinge']

param_grid = dict(neurons=neurons, batch_size=batch_size, epochs=epochs, activation=activation, optimizer=optimizer, loss=loss)

  model = KerasClassifier(build_fn=create_model, verbose=0)


In [None]:
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.022528 using {'activation': 'relu', 'batch_size': 10, 'epochs': 10, 'loss': 'squared_hinge', 'neurons': 16, 'optimizer': 'SGD'}
0.022015 (0.001222) with: {'activation': 'sigmoid', 'batch_size': 10, 'epochs': 10, 'loss': 'squared_hinge', 'neurons': 16, 'optimizer': 'SGD'}
0.021922 (0.002101) with: {'activation': 'sigmoid', 'batch_size': 10, 'epochs': 10, 'loss': 'squared_hinge', 'neurons': 16, 'optimizer': 'Adadelta'}
0.022528 (0.001316) with: {'activation': 'relu', 'batch_size': 10, 'epochs': 10, 'loss': 'squared_hinge', 'neurons': 16, 'optimizer': 'SGD'}
0.022108 (0.002114) with: {'activation': 'relu', 'batch_size': 10, 'epochs': 10, 'loss': 'squared_hinge', 'neurons': 16, 'optimizer': 'Adadelta'}
