# Оптимизация Сети Добычи
модель для предсказания оптимальных связей между нефтяными и газовыми скважинами и центрами сбора или переработки. Здесь узлы - это скважины, а рёбра - логистические или технологические связи. Каждому месторождению соответствует вектор атрибутов: название, страна, регион месторождения, тектонический режим, тип углеводорода, структурные свойства, литология, литологический период, проницаемость, долгота и широта, валовая прибыль.

In [1]:
import torch_geometric.transforms as T
from stable_gnn.model_link_predict import ModelLinkPrediction
from stable_gnn.graph import Graph

## Загружаем данные исходного графа

In [7]:
root = '../data_validation/'
name = 'oil_fields'
adjust_flag = False
dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(),adjust_flag=adjust_flag)

Processing...
Done!


## Обучаем модель link prediction
### Разбиене данных на train test

In [30]:
model = ModelLinkPrediction(number_of_trials=50)
train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)

### Обучаем классификатор на тренировочных ребрах, который выдает 1 если ребро есть и 0 -- если нет

In [31]:
cl = model.train_cl(train_edges,train_negative)

[32m[I 2023-12-18 16:47:05,967][0m A new study created in memory with name: no-name-fcbe11b4-ac46-4185-87e0-6ebb50b34124[0m
[32m[I 2023-12-18 16:47:12,032][0m Trial 0 finished with value: 4.769160747528076 and parameters: {'hidden_layer': 128, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.006525558158234442, 'out_layer': 64, 'num_negative_samples': 11, 'alpha': 0.7}. Best is trial 0 with value: 4.769160747528076.[0m
[32m[I 2023-12-18 16:47:17,560][0m Trial 1 finished with value: 4.939577102661133 and parameters: {'hidden_layer': 32, 'dropout': 0.5, 'size of network, number of convs': 2, 'lr': 0.007343418027004721, 'out_layer': 64, 'num_negative_samples': 16, 'alpha': 0.8}. Best is trial 0 with value: 4.769160747528076.[0m
[32m[I 2023-12-18 16:47:24,488][0m Trial 2 finished with value: 4.620323181152344 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 1, 'lr': 0.009083136508257546, 'out_layer': 64, 'num_negative_sample

### Проведем тестирование и узнаем точность модели

In [32]:
print("f1 measure", (model.test(cl,test_edges,test_negative)))

f1 measure 0.9718341095586605


In [33]:
import numpy as np
accs = [0.8967798853109836, 0.8383181621153013, 0.8717948717948718, 0.8447637709558022, 0.8551483420593369, 0.9113756613756614 , 0.912634114298226, 0.896445809565598, 0.955190771960958, 0.9718341095586605]
print(np.mean(accs), np.std(accs))

0.89542854989954 0.04239351078946188


## Загружаем данные и уточняем граф с помощью флага adjust_flag = True

In [2]:
root = '../data_validation/'
name='oil_fields'
adjust_flag = True

dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(), adjust_flag=adjust_flag)

In [50]:
len(dataset[0].x)

396

### Обучаем классификатор на тренировочных ребрах

In [35]:
accs = []
for _ in range(10):
    model = ModelLinkPrediction(number_of_trials=50)
    train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
    cl = model.train_cl(train_edges,train_negative)
    accs.append(model.test(cl,test_edges,test_negative))


[32m[I 2023-12-18 17:05:02,519][0m A new study created in memory with name: no-name-121dc2ea-b1a8-4086-a1f5-0eed4cb4949f[0m
[32m[I 2023-12-18 17:05:07,527][0m Trial 0 finished with value: 3.089560031890869 and parameters: {'hidden_layer': 32, 'dropout': 0.0, 'size of network, number of convs': 1, 'lr': 0.00987370020016234, 'out_layer': 32, 'num_negative_samples': 16, 'alpha': 0.3}. Best is trial 0 with value: 3.089560031890869.[0m
[32m[I 2023-12-18 17:05:14,267][0m Trial 1 finished with value: 2.5521743297576904 and parameters: {'hidden_layer': 64, 'dropout': 0.2, 'size of network, number of convs': 1, 'lr': 0.008548572512505577, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.1}. Best is trial 1 with value: 2.5521743297576904.[0m
[32m[I 2023-12-18 17:05:20,625][0m Trial 2 finished with value: 2.5620169639587402 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 1, 'lr': 0.006606238146237999, 'out_layer': 128, 'num_negative_samp

In [36]:
import numpy as np
print(np.mean(accs), np.std(accs))

0.5118064851774533 0.02222114645791002


### Проведем тестирование и узнаем точность модели для уточненного графа

In [37]:
print("f1 measure", (model.test(cl,test_edges,test_negative)))

f1 measure 0.4937655860349127


In [6]:
import numpy as np
np.mean(accs)

0.46890657795921387

In [8]:
accs = [] #adjust True
for _ in range(10):
    model = ModelLinkPrediction(number_of_trials=0)
    train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
    cl = model.train_cl(train_edges,train_negative)
    accs.append(model.test(cl,test_edges,test_negative))
print(np.mean(accs), np.std(accs))

  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))
  emb_norm = torch.nn.functional.norma

0.7229472424842118 0.014154189848559215


In [None]:
#make adjust flag false
for _ in range(10):
    model = ModelLinkPrediction(number_of_trials=0)
    train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
    cl = model.train_cl(train_edges,train_negative)
    accs.append(model.test(cl,test_edges,test_negative))
print(np.mean(accs), np.std(accs))