In [1]:
import numpy as np
import pandas as pd
import torch
import segyio

In [3]:
filename = "../../data/external/Area2_all_processed_data.sgy"
with segyio.open(filename, "r", endian = 'big', ignore_geometry=True) as segyfile:
    # Читаем заголовки трасс
    headers = segyfile.header
    print(segyfile.trace[0])

[277.43115 298.11084 332.88135 385.84375 482.18896 572.0332  627.917
 630.11035 599.30444 550.3804 ]


In [4]:
model = torch.load("../../mlartifacts/536689916772439481/c76673cf9ade4355b09fb4397d4026d8/artifacts/model/data/model.pth")

In [6]:
from seysmo.models.utils import give_data

X_train, y_train, coord_train, X_val, y_val, coord_val, X_test, y_test, coord_test = give_data(
    '../../data/processed/coord_dict.pkl')

In [7]:
X_train = np.transpose(X_train, (0,2,1))
X_val = np.transpose(X_val, (0,2,1))
X_test = np.transpose(X_test, (0,2,1))
X_train.shape

(11045, 438, 33)

In [8]:
from torch.utils.data import DataLoader
from seysmo.models.utils import SignalSpeedDataset

train_dataset = SignalSpeedDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_dataset = SignalSpeedDataset(X_val, y_val)
val_dataloader = DataLoader(val_dataset, batch_size=32, shuffle=True)
test_dataset = SignalSpeedDataset(X_test, y_test)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [10]:
from torchmetrics import MeanAbsolutePercentageError
from torch import nn

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_batches = len(test_dataloader)
eval_loss, eval_mape = 0, 0
loss_fn = nn.MSELoss()
metric_fn = MeanAbsolutePercentageError().to(device)
with torch.no_grad():
    for X, y in test_dataloader:
        X, y = X.to(device), y.to(device)
        pred = model(X)
        eval_loss += loss_fn(torch.reshape(pred, (-1, 10)), torch.reshape(y, (-1, 10))).item()
        eval_mape += metric_fn(torch.reshape(pred, (-1, 10)), torch.reshape(y, (-1, 10))).item()

eval_loss /= num_batches
eval_mape /= num_batches
print(f"Test loss: {eval_loss:4f}")
print(f"Test MAPE: {eval_mape:4f}")

Test loss: 1090.713867
Test MAPE: 0.069365


In [12]:
filename = "../../data/raw/second_place/Area2_all_processed_data.sgy"
with segyio.open(filename, "r", endian = 'big', ignore_geometry=True) as segyfile:
    tr_start = 0
    tr_last = 1

    while tr_last != len(segyfile.trace) + 1:
        while segyfile.header[tr_last][segyio.TraceField.FieldRecord] == segyfile.header[tr_start][
            segyio.TraceField.FieldRecord]:
            tr_last += 1
            if tr_last == len(segyfile.trace):
                break
        seysmogramm = pd.DataFrame(segyfile.trace[tr_start:tr_last]).values
        print(seysmogramm.shape)
        break
        # rec_mid_x = floor(
        #     ((segyfile.header[tr_start][segyio.TraceField.GroupX] / 100 + segyfile.header[tr_last - 1][
        #         segyio.TraceField.GroupX] / 100) / 2))
        # rec_mid_y = floor(
        #     ((segyfile.header[tr_start][segyio.TraceField.GroupY] / 100 + segyfile.header[tr_last - 1][
        #         segyio.TraceField.GroupY] / 100) / 2))
        # seysm_dict[(rec_mid_x, rec_mid_y)] = seysmogramm
        # tr_start, tr_last = tr_last, tr_last + 1
        # print(tr_start, '/', len(segyfile.trace))

(28, 313)


In [76]:
filename = "../../data/raw/second_place/Result_TXT/Area1/1003.txt"
df = pd.read_csv(filename, delimiter='\t')

In [77]:
df

Unnamed: 0,Receiver Midpoint,Depth,Velocity,REC_STAT1,REC_STAT2,Vs30
0,533267.2,5.0,115.357,0,0,169.12
1,533267.2,15.0,203.346,0,0,169.12
2,533267.2,25.0,240.804,0,0,169.12
3,533267.2,35.0,280.395,0,0,169.12
4,533267.2,45.0,357.926,0,0,169.12
...,...,...,...,...,...,...
965,545139.3,55.0,528.726,0,0,277.64
966,545139.3,65.0,516.216,0,0,277.64
967,545139.3,75.0,506.550,0,0,277.64
968,545139.3,85.0,485.454,0,0,277.64


In [78]:
df = df.groupby(['Receiver Midpoint'])

In [79]:
def process_group(group):
    # Сортировка по Depth
    sorted_group = group.sort_values(by='Depth')
    # Получаем все значения Velocity в виде списка
    velocity_array = sorted_group['Velocity'].tolist()
    return velocity_array

In [80]:
result = df.apply(process_group)
result_df = pd.DataFrame(result, columns=['Velocity'])

  result = df.apply(process_group)


In [81]:
result_df


Unnamed: 0_level_0,Velocity
Receiver Midpoint,Unnamed: 1_level_1
533267.20,"[115.357, 203.346, 240.804, 280.395, 357.926, ..."
533806.15,"[107.0, 200.0, 199.0, 282.0, 333.0, 372.0, 450..."
534475.30,"[111.462, 170.001, 244.3, 305.004, 260.754, 42..."
534590.20,"[189.692, 190.679, 308.419, 337.618, 381.47, 4..."
534702.90,"[177.322, 208.138, 311.379, 363.679, 404.736, ..."
...,...
544699.65,"[172.091, 249.601, 367.132, 433.192, 498.16, 5..."
544808.30,"[164.407, 252.191, 353.682, 467.105, 525.42, 5..."
544917.45,"[164.426, 266.763, 329.936, 443.107, 508.66, 5..."
545026.05,"[216.068, 236.916, 395.678, 449.924, 443.701, ..."


In [93]:
import pickle

with open('../../data/processed/second_place.pkl', 'rb') as f:
    data_coord = pickle.load(f)

In [94]:
len(data_coord)

6603

In [95]:
data_coord

{(533267,
  7667834): (array([[0.        , 0.        , 0.        , ..., 0.00283979, 0.00300987,
          0.00391184],
         [0.        , 0.        , 0.        , ..., 0.00586825, 0.00547483,
          0.00396711],
         [0.        , 0.        , 0.        , ..., 0.00283979, 0.00300987,
          0.00391184],
         ...,
         [0.        , 0.        , 0.        , ..., 0.00586825, 0.00547483,
          0.00396711],
         [0.        , 0.        , 0.        , ..., 0.00283979, 0.00300987,
          0.00391184],
         [0.        , 0.        , 0.        , ..., 0.00586825, 0.00547483,
          0.00396711]], dtype=float32), Velocity    [115.357, 203.346, 240.804, 280.395, 357.926, ...
  Name: 533267.2, dtype: object),
 (533806,
  7666848): (array([[ 0.        ,  0.        ,  0.        , ..., -0.00694793,
          -0.00512913, -0.00280678],
         [ 0.        ,  0.        ,  0.        , ..., -0.00685175,
          -0.00656113, -0.00630696],
         [ 0.        ,  0.        ,