# Оптимизация Сети Добычи
модель для предсказания оптимальных связей между нефтяными и газовыми скважинами и центрами сбора или переработки. Здесь узлы - это скважины, а рёбра - логистические или технологические связи. Каждому месторождению соответствует вектор атрибутов: название, страна, регион месторождения, тектонический режим, тип углеводорода, структурные свойства, литология, литологический период, проницаемость, долгота и широта, валовая прибыль.

In [None]:
import pandas as pd
import torch_geometric.transforms as T
from stable_gnn.model_link_predict import ModelLinkPrediction
from stable_gnn.graph import Graph

## Загружаем данные исходного графа

In [None]:
root = '../data_validation/'
name = 'oil_fields'
adjust_flag = False
dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(),adjust_flag=adjust_flag)

In [29]:
results = pd.DataFrame(columns=['embeddings_learn', 'adjust_flag','test accuracy'])

## Обучаем модель link prediction на атрибутах
### Разбиене данных на train test

In [32]:
def num_of_trials(embeddings_learn):
    if embeddings_learn:
        return 50
    else:
        return 0
    
embeddings_learn = 0
num_trials = num_of_trials(num_of_trials)
    
model = ModelLinkPrediction(number_of_trials=num_trials)
train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)

### Обучаем классификатор на тренировочных ребрах, который выдает 1 если ребро есть и 0 -- если нет

In [None]:
cl = model.train_cl(train_edges,train_negative)

### Проведем тестирование и узнаем точность модели

In [31]:
test_accuracy = model.test(cl,test_edges,test_negative)
print("f1 measure", test_accuracy)
results=pd.concat([results, pd.DataFrame([embeddings_learn,adjust_flag,test_accuracy],index=results.columns).transpose()], ignore_index=True, axis=0)

  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))


f1 measure 0.7483388704318938


# Теперь проверим точность в случае обучения векторных представлений

In [33]:
embeddings_learn = 50
num_trials = num_of_trials(num_of_trials)
    
model = ModelLinkPrediction(number_of_trials=num_trials)
train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
cl = model.train_cl(train_edges,train_negative)
test_accuracy = model.test(cl,test_edges,test_negative)
print("f1 measure", test_accuracy)
results=pd.concat([results, pd.DataFrame([embeddings_learn,adjust_flag,test_accuracy],index=results.columns).transpose()], ignore_index=True, axis=0)

[I 2024-01-05 19:22:11,811] A new study created in memory with name: no-name-18e59030-35d6-40b9-bd38-66976b73f816
[I 2024-01-05 19:22:19,731] Trial 0 finished with value: 5.351813793182373 and parameters: {'hidden_layer': 64, 'dropout': 0.2, 'size of network, number of convs': 2, 'lr': 0.005609700116324191, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.9}. Best is trial 0 with value: 5.351813793182373.
[I 2024-01-05 19:22:48,265] Trial 1 finished with value: 3.3733105659484863 and parameters: {'hidden_layer': 64, 'dropout': 0.1, 'size of network, number of convs': 3, 'lr': 0.0069892351678262545, 'out_layer': 128, 'num_negative_samples': 21, 'alpha': 0.1}. Best is trial 1 with value: 3.3733105659484863.
[I 2024-01-05 19:22:55,849] Trial 2 finished with value: 4.6360979080200195 and parameters: {'hidden_layer': 64, 'dropout': 0.5, 'size of network, number of convs': 1, 'lr': 0.009450459868094067, 'out_layer': 32, 'num_negative_samples': 1, 'alpha': 0.5}. Best is trial 1 with val

f1 measure 0.9029783393501806


In [38]:
results

Unnamed: 0,embeddings_learn,adjust_flag,test accuracy
1,0,False,0.748339
2,50,False,0.902978


## Загружаем данные и уточняем граф с помощью флага adjust_flag = True

In [39]:
root = '../data_validation/'
name='oil_fields'
adjust_flag = True

dataset = Graph(root=root + str(name), name=name, transform=T.NormalizeFeatures(), adjust_flag=adjust_flag)

Processing...
Done!


### Обучаем классификатор на тренировочных ребрах сначала обучая на атрибутах

In [46]:
embeddings_learn = 0
num_trials = num_of_trials(embeddings_learn)

model = ModelLinkPrediction(number_of_trials=num_trials)
train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
cl = model.train_cl(train_edges,train_negative)


  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))


### Проведем тестирование и узнаем точность модели для уточненного графа

In [47]:
test_accuracy = model.test(cl,test_edges,test_negative)
print("f1 measure", test_accuracy)
results=pd.concat([results, pd.DataFrame([embeddings_learn,adjust_flag,test_accuracy],index=results.columns).transpose()], ignore_index=True, axis=0)

f1 measure 0.4895833333333334


  emb_norm = torch.nn.functional.normalize(torch.tensor(self.embeddings))


### Проверим точность при обучении векторных представлений

In [48]:
embeddings_learn = 50
num_trials = num_of_trials(embeddings_learn)

model = ModelLinkPrediction(number_of_trials=num_trials)
train_edges, train_negative, test_edges, test_negative = model.train_test_edges(dataset)
cl = model.train_cl(train_edges,train_negative)


[I 2024-01-05 19:38:24,761] A new study created in memory with name: no-name-03a197f5-94bb-4662-8240-04a882102189
[I 2024-01-05 19:38:35,862] Trial 0 finished with value: 3.3235795497894287 and parameters: {'hidden_layer': 32, 'dropout': 0.30000000000000004, 'size of network, number of convs': 1, 'lr': 0.009794583675427977, 'out_layer': 64, 'num_negative_samples': 21, 'alpha': 0.3}. Best is trial 0 with value: 3.3235795497894287.
[I 2024-01-05 19:38:44,030] Trial 1 finished with value: 4.4937744140625 and parameters: {'hidden_layer': 128, 'dropout': 0.4, 'size of network, number of convs': 1, 'lr': 0.006936588749113738, 'out_layer': 128, 'num_negative_samples': 11, 'alpha': 0.6}. Best is trial 0 with value: 3.3235795497894287.
[I 2024-01-05 19:38:47,504] Trial 2 finished with value: 4.087934494018555 and parameters: {'hidden_layer': 32, 'dropout': 0.30000000000000004, 'size of network, number of convs': 1, 'lr': 0.007823853508227683, 'out_layer': 128, 'num_negative_samples': 1, 'alpha'

### Проведем тестирование и узнаем точность модели для уточненного графа

In [49]:
test_accuracy = model.test(cl,test_edges,test_negative)
print("f1 measure", test_accuracy)
results=pd.concat([results, pd.DataFrame([embeddings_learn,adjust_flag,test_accuracy],index=results.columns).transpose()], ignore_index=True, axis=0)

f1 measure 0.5714285714285714


# Сводная таблица по точностям

In [50]:
results

Unnamed: 0,embeddings_learn,adjust_flag,test accuracy
0,0,False,0.748339
1,50,False,0.902978
2,0,True,0.489583
3,50,True,0.571429
