In [107]:
import numpy as np
import torch
import requests

In [108]:
data_root_path = '../data'
loaded_data = torch.load(data_root_path + '/sequence_data.pt', weights_only=False)
X_seq = loaded_data['X_seq']
y_seq = loaded_data['y_seq']

In [109]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic
from sklearn.cluster import KMeans

def calculate_distance(current_lat, current_lon, prev_lat, prev_lon):
    if pd.isnull(prev_lat) or pd.isnull(prev_lon):
        return 0
    return geodesic((current_lat, current_lon), (prev_lat, prev_lon)).km


def preprocess(data):
    data = data[['Date', 'Time', 'Latitude', 'Longitude', 'Magnitude']]
    data.loc[:, 'Timestamp'] = pd.to_datetime(data['Date'] + ' ' + data['Time'], format="%m/%d/%Y %H:%M:%S", errors='coerce')
    data = data[data['Timestamp'] >= pd.Timestamp('1970-01-01')]

    data.reset_index(drop=True, inplace=True)
    data = data.drop(['Date', 'Time'], axis=1)

    data['Year'] = data['Timestamp'].dt.year
    data['Month'] = data['Timestamp'].dt.month
    data['Day'] = data['Timestamp'].dt.day
    data['Weekday'] = data['Timestamp'].dt.weekday
    data['Hour'] = data['Timestamp'].dt.hour
 
    kmeans = KMeans(n_clusters=10, random_state=42)
    data['Region_Cluster'] = kmeans.fit_predict(data[['Latitude', 'Longitude']])

    data.drop('Timestamp', axis=1, inplace=True)

    return data

In [110]:
import pandas as pd


dataset = pd.read_csv(data_root_path + '/database.csv')

dataset = preprocess(dataset)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[:, 'Timestamp'] = pd.to_datetime(data['Date'] + ' ' + data['Time'], format="%m/%d/%Y %H:%M:%S", errors='coerce')


In [111]:
dataset['Region_Cluster'].value_counts()

Region_Cluster
0    4043
4    3710
6    2880
1    2727
3    1848
7    1824
2    1671
8    1355
9     983
5     912
Name: count, dtype: int64

In [112]:
dataset = dataset[dataset['Region_Cluster'] == 4]

In [113]:
dataset = dataset[:50]

In [114]:
dataset = dataset[:42]

In [115]:
dataset

Unnamed: 0,Latitude,Longitude,Magnitude,Year,Month,Day,Weekday,Hour,Region_Cluster
6,-9.166,117.235,5.8,1970,1,9,4,23,4
7,6.785,126.682,7.2,1970,1,10,5,12,4
8,6.71,126.863,5.7,1970,1,10,5,13,4
9,6.71,126.765,5.7,1970,1,10,5,14,4
10,7.326,126.857,6.0,1970,1,11,6,5,4
11,-10.169,123.233,6.0,1970,1,14,2,2,4
33,12.584,121.952,6.7,1970,2,5,3,22,4
35,12.687,121.924,6.0,1970,2,6,4,2,4
39,-5.936,130.638,5.8,1970,2,10,1,19,4
40,-5.923,112.997,6.5,1970,2,13,4,15,4


In [123]:
# set magnitude as 0
dataset.iloc[41, 2] = 0.0

In [124]:
dataset.iloc[41]

Latitude            -8.683
Longitude          124.000
Magnitude            0.000
Year              1970.000
Month                6.000
Day                 28.000
Weekday              6.000
Hour                 1.000
Region_Cluster       4.000
Name: 182, dtype: float64

In [127]:
md = dataset.reset_index(drop=True).to_markdown
md

<bound method DataFrame.to_markdown of     Latitude  Longitude  Magnitude  Year  Month  Day  Weekday  Hour  \
0     -9.166    117.235        5.8  1970      1    9        4    23   
1      6.785    126.682        7.2  1970      1   10        5    12   
2      6.710    126.863        5.7  1970      1   10        5    13   
3      6.710    126.765        5.7  1970      1   10        5    14   
4      7.326    126.857        6.0  1970      1   11        6     5   
5    -10.169    123.233        6.0  1970      1   14        2     2   
6     12.584    121.952        6.7  1970      2    5        3    22   
7     12.687    121.924        6.0  1970      2    6        4     2   
8     -5.936    130.638        5.8  1970      2   10        1    19   
9     -5.923    112.997        6.5  1970      2   13        4    15   
10    -0.045    122.999        5.9  1970      2   15        6    12   
11     9.846    125.912        5.6  1970      2   17        1     5   
12    -8.734    124.062        5.7  19

In [128]:
# connecting to ollama : llama3.2:1b

ollama_url = "http://localhost:11434/v1/chat/completions"

payload = {
    "model": "llama3.2:1b",
    "messages": [
        {
            "role": "user",
            "content": f"{md} \nThis is the earthquakes data. You need to output a single value for the predicted earthquake magnitude of the last row. It is currently set as 0.0 as we don't have it's measurements. You need to output only a single value from 0.0 to 10.0."
        }
    ]
}

response = requests.post(ollama_url, json=payload)

KeyboardInterrupt: 

In [22]:
import json

response_json = response.json()
response_json['choices'][0]['message']['content']

"I'm doing well, thanks for asking. I'm a large language model, so I don't have feelings or emotions like humans do, but I'm here and able to assist you with any questions or tasks you have for me. How about you? How's your day going?"

In [129]:
import torch.nn as nn
class EarthquakeMagnitudeLSTM(nn.Module):
    def __init__(self, input_size, hidden_size=128):
        super(EarthquakeMagnitudeLSTM, self).__init__()
        
        self.first_lstm_layers = nn.ModuleList([
            nn.LSTM(
                input_size if i == 0 else hidden_size * 2, 
                hidden_size, 
                batch_first=True, 
                bidirectional=True
            ) for i in range(3)
        ])
        
        self.first_attention = nn.MultiheadAttention(
            embed_dim=hidden_size * 2, 
            num_heads=4, 
            dropout=0.2,
            batch_first=True
        )
        
        self.second_lstm_layers = nn.ModuleList([
            nn.LSTM(
                hidden_size * 2, 
                hidden_size, 
                batch_first=True, 
                bidirectional=True
            ) for _ in range(3)
        ])
        
        self.second_attention = nn.MultiheadAttention(
            embed_dim=hidden_size * 2, 
            num_heads=4, 
            dropout=0.2,
            batch_first=True
        )
        
        self.magnitude_predictor = nn.Sequential(
            nn.Linear(hidden_size * 2, 256),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 1)
        )
        
    def forward(self, x):
        for lstm_layer in self.first_lstm_layers:
            x, _ = lstm_layer(x)
        
        x, _ = self.first_attention(x, x, x)
        
        for lstm_layer in self.second_lstm_layers:
            x, _ = lstm_layer(x)
        
        x, _ = self.second_attention(x, x, x)
        
        pooled_features = torch.mean(x, dim=1)
        
        magnitude = self.magnitude_predictor(pooled_features)
        
        return magnitude

In [None]:
from pathlib import Path
from torch.utils.data import DataLoader



hidden_size = 64
n_epochs = 32
lr = 0.005
lr_str = str(lr).replace('.', 'P')
batch_size = 32
window_size = 100
model_path = Path(__file__).parent.parent / 'model' / 'modelfile' / f'model_{n_epochs}_{lr_str}_{batch_size}_{window_size}.pt'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = EarthquakeMagnitudeLSTM(X_seq.shape[-1], hidden_size=hidden_size)
model.load_state_dict(torch.load(model_path, weights_only=True))
model.to(device)
model.eval()

test_dataset = torch.utils.data.TensorDataset(X_test, Y_test)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

all_predictions = []
all_true_labels = []

with torch.no_grad():
    for batch_features, batch_labels in test_dataloader:
        batch_features, batch_labels = batch_features.to(device), batch_labels.to(device)
        
        predictions = model(batch_features).squeeze()
        
        all_predictions.extend(predictions.cpu().numpy())
        all_true_labels.extend(batch_labels.cpu().numpy())

# Convert to numpy arrays
predictions = np.array(all_predictions)
true_labels = np.array(all_true_labels)