In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras


In [4]:
csv_file = "Output_Files\ETAS_Output.csv"
df = pd.read_csv(csv_file)
df.head()

Unnamed: 0,1/1/1960,12:06:15 AM,1960.004341,-115.9425,33.1175,7.379938023,12.2093,a
0,1/2/1960,12:08:52 AM,1960.006166,-115.949,33.0793,4.250619,12.1828,a
1,1/2/1960,12:11:18 AM,1960.007855,-115.913,33.1003,3.922919,12.1759,a
2,1/2/1960,12:11:35 AM,1960.008053,-115.9316,33.0953,3.714657,12.1618,a
3,1/3/1960,12:11:56 AM,1960.008294,-115.9395,33.1015,3.588929,12.1603,a
4,1/3/1960,12:11:57 AM,1960.008302,-115.9203,33.1382,3.714651,12.1619,a


In [5]:
summary_stats = df.describe(include="all")
print(summary_stats)

         1/1/1960  12:06:15 AM   1960.004341     -115.9425       33.1175  \
count       15798        15798  15798.000000  15798.000000  15798.000000   
unique      10863        14442           NaN           NaN           NaN   
top     3/22/2001  12:33:48 AM           NaN           NaN           NaN   
freq           36            4           NaN           NaN           NaN   
mean          NaN          NaN   1995.089361   -117.671267     34.920364   
std           NaN          NaN     18.528216      2.120964      2.200233   
min           NaN          NaN   1960.006166   -123.216500     29.080400   
25%           NaN          NaN   1979.282607   -118.807725     33.292250   
50%           NaN          NaN   2001.531674   -117.352900     34.533500   
75%           NaN          NaN   2007.674053   -116.249950     36.939300   
max           NaN          NaN   2023.673653   -113.225700     39.050100   

         7.379938023       12.2093      a  
count   15798.000000  15798.000000  15798  

In [6]:
df.columns = ["Date", "Time", "Year", "X", "Y", "Magnitude", "Z", "Temp"]

In [7]:
new_df = df.copy()

new_df["Date"] = pd.to_datetime(new_df["Date"])
new_df.loc[new_df["Date"].dt.year > pd.Timestamp.now().year, "Date"] -= pd.DateOffset(years=100)

new_df.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z,Temp
0,1960-01-02,12:08:52 AM,1960.006166,-115.949,33.0793,4.250619,12.1828,a
1,1960-01-02,12:11:18 AM,1960.007855,-115.913,33.1003,3.922919,12.1759,a
2,1960-01-02,12:11:35 AM,1960.008053,-115.9316,33.0953,3.714657,12.1618,a
3,1960-01-03,12:11:56 AM,1960.008294,-115.9395,33.1015,3.588929,12.1603,a
4,1960-01-03,12:11:57 AM,1960.008302,-115.9203,33.1382,3.714651,12.1619,a


In [8]:
#Extract Date column into day, month, year...
new_df["Day"] = new_df["Date"].dt.day
new_df["Month"] = new_df["Date"].dt.month
new_df["Year"] = new_df["Date"].dt.year
new_df["DayOfWeek"] = new_df["Date"].dt.dayofweek
new_df["Quarter"] = new_df["Date"].dt.quarter
new_df.head()

Unnamed: 0,Date,Time,Year,X,Y,Magnitude,Z,Temp,Day,Month,DayOfWeek,Quarter
0,1960-01-02,12:08:52 AM,1960,-115.949,33.0793,4.250619,12.1828,a,2,1,5,1
1,1960-01-02,12:11:18 AM,1960,-115.913,33.1003,3.922919,12.1759,a,2,1,5,1
2,1960-01-02,12:11:35 AM,1960,-115.9316,33.0953,3.714657,12.1618,a,2,1,5,1
3,1960-01-03,12:11:56 AM,1960,-115.9395,33.1015,3.588929,12.1603,a,3,1,6,1
4,1960-01-03,12:11:57 AM,1960,-115.9203,33.1382,3.714651,12.1619,a,3,1,6,1


In [10]:
#Extract the time column into hours, mins, seconds...
new_df["Time"] = pd.to_datetime(new_df["Time"], format="%H:%M:%S.%f")
new_df["Hour"] = new_df["Time"].dt.hour
new_df["Minute"] = new_df["Time"].dt.minute
new_df["Second"] = new_df["Time"].dt.second
new_df["Millisecond"] = new_df["Time"].dt.microsecond // 1000
new_df.head()

ValueError: time data '12:08:52 AM' does not match format '%H:%M:%S.%f' (match)

In [None]:
# Drop the original "Time" column from the new DataFrame
new_df = new_df.drop("Time", axis=1)

# Drop the "Date" column from the new DataFrame
new_df = new_df.drop("Date", axis=1)
new_df = new_df.drop("Year", axis=1)
new_df.head()

Unnamed: 0,X,Y,Magnitude,Z,Temp,Day,Month,DayOfWeek,Quarter,Hour,Minute,Second,Millisecond
0,-121.882,37.0759,3.18,16.8899,a,7,1,3,1,0,28,40,0
1,-121.8449,37.0512,3.22,16.8739,a,8,1,4,1,0,33,48,0
2,-121.8604,37.091,3.13,16.9203,a,8,1,4,1,0,34,10,0
3,-121.8603,37.0607,3.53,16.9199,a,9,1,5,1,0,35,25,0
4,-121.8911,37.0653,3.04,16.9289,a,9,1,5,1,0,36,42,0


In [None]:
def split_dataframe(data, test_size=0.2, random_state=None):
    """
    Split a Pandas DataFrame into a training set and a test set.

    Parameters:
    - data (pd.DataFrame): The DataFrame to be split.
    - test_size (float or int, optional): The proportion of the dataset to include in the test split
      (between 0.0 and 1.0) or the number of samples if an int. Default is 0.2.
    - random_state (int or None, optional): Seed for the random number generator. If None, a random seed
      will be chosen. Default is None.

    Returns:
    - train_data (pd.DataFrame): The training set.
    - test_data (pd.DataFrame): The test set.
    """
    train_data, test_data = train_test_split(data, test_size=test_size, random_state=random_state)
    return train_data, test_data

In [None]:
train_set, test_set = split_dataframe(new_df, test_size=0.2, random_state=42)
train_set.head()

Unnamed: 0,X,Y,Magnitude,Z,Temp,Day,Month,DayOfWeek,Quarter,Hour,Minute,Second,Millisecond
25161,-115.3637,32.2063,3.63,4.641,b,2,3,2,1,4,1,35,0
8676,-118.473,34.31,3.45,1.1517,b,11,1,2,1,0,45,41,0
27364,-115.8189,32.7733,3.35,1.2321,a,25,4,5,2,7,33,56,0
265,-122.0587,37.331,4.11,17.1915,a,22,3,1,1,5,25,14,0
22727,-121.7861,37.2612,3.38,13.1088,a,2,7,6,3,12,2,58,0


In [None]:
# Extract features and labels from the DataFrames
X_train = train_set[['X', 'Y']].values
y_train = train_set['Magnitude'].values  # Replace 'target_column' with your actual target column name

X_test = test_set[['X', 'Y']].values
y_test = test_set['Magnitude'].values  # Replace 'target_column' with your actual target column name


# Define a simple surrogate model
surrogate_model = keras.Sequential([
    keras.layers.Dense(1, input_shape=(1,), activation='linear')  # Linear regression model
])

# Compile the surrogate model
surrogate_model.compile(optimizer='adam', loss='mean_squared_error')

# Train the surrogate model
surrogate_model.fit(X_train, y_train, epochs=100, verbose=2)

# You can now use the surrogate model for predictions
predictions = surrogate_model.predict(X_test)

print("Predictions:")
print(predictions)

Epoch 1/100


ValueError: in user code:

    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\engine\training.py", line 1338, in train_function  *
        return step_function(self, iterator)
    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\engine\training.py", line 1322, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\engine\training.py", line 1303, in run_step  **
        outputs = model.train_step(data)
    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\engine\training.py", line 1080, in train_step
        y_pred = self(x, training=True)
    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\utils\traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "c:\Users\Vishal\AppData\Local\Programs\Python\Python310\lib\site-packages\keras\src\engine\input_spec.py", line 280, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer 'sequential_6' (type Sequential).
    
    Input 0 of layer "dense_8" is incompatible with the layer: expected axis -1 of input shape to have value 1, but received input with shape (32, 2)
    
    Call arguments received by layer 'sequential_6' (type Sequential):
      • inputs=tf.Tensor(shape=(32, 2), dtype=float32)
      • training=True
      • mask=None


In [None]:
#Autoencoder

#build the simple encoder-decoder model. 
#Notice the number of neurons in each Dense layer. 
#The model will contract in the encoder then expand in the decoder.
encoder = keras.models.Sequential([keras.layers.Dense(2, input_shape=[3])])
decoder = keras.models.Sequential([keras.layers.Dense(3, input_shape=[2])])
  
autoencoder = keras.models.Sequential([encoder, decoder])
  
#compile the model
autoencoder.compile(loss="mse", optimizer=keras.optimizers.SGD(lr=0.1))
  
#train the model
history = autoencoder.fit(X_train, X_train, epochs=200)
  
# encode the data
codings = encoder.predict(X_train)
  
# decode the encoder output
decodings = decoder.predict(codings)