In [1]:
import pandas as pd
import numpy as np
import math
import os
import re
import json
import unicodedata
from owlready2 import *
from getpass import getpass
from IPython.display import clear_output
import collections

In [2]:
chave  = os.getenv('BG40')
senha  = getpass('Senha: ')

os.environ['HTTP_PROXY']  = f'http://{chave}:{senha}@inet-sys.petrobras.com.br:804'
os.environ['HTTPS_PROXY'] = f'http://{chave}:{senha}@inet-sys.petrobras.com.br:804'
os.environ['NO_PROXY']    = '127.0.0.1, localhost, petrobras.com.br, petrobras.biz'

Senha:  ········


In [3]:
def _load_ontology(ontologyPath):
    ontology = get_ontology("file://"+ontologyPath)
    ontology.load()
    return ontology

#### Insert list of basins as individuals

Creates instances for the BASIN entity. Each instance is registered under a URI with an unique code (BASE_CD_BACIA)

In [4]:
def _insert_basins(ontology, basinListPath):
    '''
    Populates the BASIN class of a given ontology with instances.
    Entries: list of basin codes and names (json) and ontology containing a #basin class (.owl)
    Returns the populated ontology
    '''
    with open(basinListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.basin(instance)
        newInstance.label.append(instances[instance])
    
    return ontology

#### Insert list of fields as individuals

Creates instances for the FIELD entity. Each instance is registered under a URI with an unique code (CAMP_CD_CAMPO).

In [5]:
def _insert_fields(ontology, fieldListPath):
    '''
    Populates the FIELD class of a given ontology with instances.
    Entries: list of field codes and names (json) and ontology containing a #field class (.owl)
    Returns the populated ontology
    '''
    with open(fieldListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.field(instance)
        newInstance.label.append(instances[instance])

    return ontology

#### Insert list of lithostratigraphic units as individuals

Creates instances for the LITHOSTRATIGRAPHIC_UNIT entity. Each instance is registered under a sequential numeric code.

In [6]:
def _insert_lithostratigraphic_units(ontology, lithostratigraphyListPath):
    '''
    Populates the LITHOSTRATIGRAPHIC UNIT class of a given ontology with instances.
    Entries: list of lithostratigraphic codes and names (json) and ontology containing a #lithostratigraphic_unit class (.owl)
    Returns the populated ontology
    '''
    with open(lithostratigraphyListPath, 'r') as f:
        instances = json.load(f)

    for lithostratigraphicUnit in instances.keys():
        newInstance=ontology.lithostratigraphic_unit(lithostratigraphicUnit)
        if lithostratigraphicUnit.startswith('GRUPO_'):
            newInstance.label.append('Grupo '+instances[lithostratigraphicUnit])

        if lithostratigraphicUnit.startswith('FORMACAO_'):
            newInstance.label.append('Formação '+instances[lithostratigraphicUnit])

        if lithostratigraphicUnit.startswith('MEMBRO_'):
            newInstance.label.append('Membro '+instances[lithostratigraphicUnit])
    
    return ontology

#### Insert geological structure instances

Creates instances for the GEOLOGICAL STRUCTURE entity. Each instance is registered under a URI with an unique numeric code.

In [7]:
def _insert_geological_structures(ontology, geologicalStructureListPath, uri='ESTRUTURA_'):
    '''
    Populates the GEOLOGICAL STRUCTURE class of a given ontology with instances.
    Entries: list of geological structure codes and names (json) and ontology containing a #geological_structure class (.owl)
    Returns the populated ontology
    '''
    with open(geologicalStructureListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.geological_structure(uri+str(instance).zfill(3))
        newInstance.label.append(instances[instance])

    return ontology

#### Insert texture instances

Creates instances for the TEXTURE entity. Each instance is registered under a URI with an unique numeric code.

In [8]:
def _insert_rock_textures(ontology, textureListPath, uri='TEXTURA_'):
    '''
    Populates the ROCK TEXTURE class of a given ontology with instances.
    Entries: list of rock texture codes and names (json) and ontology containing a #textura class (.owl)
    Returns the populated ontology
    '''
    with open(textureListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.textura(uri+str(instance).zfill(3))
        newInstance.label.append(instances[instance])

    return ontology

#### Insert list of chronostratigraphic units as individuals

Creates instances for the CHRONOSTRATIGRAPHIC_UNIT entity. Each instance is registered under a URI that represents the instance name.

In [9]:
def _insert_chronostratigraphic_units(ontology, chronostratigraphyListPath):
    '''
    Populates the GEOLOGICAL TIME INTERVAL class of a given ontology with instances.
    Entries: list of chronostratigraphic units codes and names (json) and ontology containing a #geological_time_interval class (.owl)
    Returns the populated ontology
    '''
    with open(chronostratigraphyListPath, 'r') as f:
        instances = json.load(f)
    
    for chronostratigraphicUnit in instances.keys():
        for instance in instances[chronostratigraphicUnit].keys():
            newInstance=ontology.geological_time_interval(instance)
            for name in instances[chronostratigraphicUnit][instance]:
                newInstance.label.append(name)

    return ontology

#### Insert list of wells as individuals

In [10]:
def _semhifen(name):
    # DERIVANDO TERMO SEM OS HÍFENS 
    # EX. 3-RJS-739  => 3RJS739
    # EX. 7-CRT-17HP-RJS => 7CRT17HPRJS
    return(name.replace("-",""))

def _semhifen_semuf(name):
    # DERIVANDO TERMO SEM OS HÍFENS e sem o identificador de UF ao final  
    # EX. 7-CRT-17HP-RJS => 7CRT17HP
    return(re.sub("-[A-Z]{0,3}$", '', name).replace("-",""))

def _seminicio(name):
    # EX. 7-CRT-17HP-RJS => CRT-17HP-RJS
    return(re.sub("^[0-9]-", '', name))

def _semfim(name):
    # EX. 7-CRT-17HP-RJS => 7-CRT-17HP
    return(re.sub("-[A-Z]{0,3}$", '', name))

def _semtipo(name):
    # retira indicação de tipo -> D H P A B C
    # EX. 7-CRT-17HP-RJS => 7-CRT-17-RJS
    return(re.sub("(?<=[0-9])[A-Z]{0,3}-", '-', name))

def _versaocurta(name):
    # CRIAR ALTERNATIVA AO POCO_NM_COMPLETO COM NOME CURTO MUITO USUAL EM DOCUMENTOS
    # EX. 8-LL-108D-RJS  => LL-108D
    # EX. 3-RJS-739  => RJS-739
    # EX. 3-RJS-739A => RJS-739A
    #^ => identifica	início da linha 	modelo = "^abc" 	Match com = "abcde" 	associa texto que inicia com a sequência "abc", embora não "yabc".
    #$ => identifica	fim da linha 	modelo = "abc$" 	Match com = "yzabc" 	associa texto que termina com a sequência "abc", embora não "abcde". 
    return(re.sub("((^[0-9]-)|(-[A-Z]{0,3}$))", '', name))
    
def _well_names(index, row):
    '''
    Generates a list of veriations for the name of a given well in the well DataFrame
    '''
    wells = []
    
    SG_PRFX_POCO = row['POCO_OPERADOR']
    wells.append(SG_PRFX_POCO)
    wells.append(_semhifen(SG_PRFX_POCO))
    wells.append(_semhifen_semuf(SG_PRFX_POCO))
    wells.append(_seminicio(SG_PRFX_POCO))
    wells.append(_semfim(SG_PRFX_POCO))
    wells.append(_semtipo(SG_PRFX_POCO))
    wells.append(_versaocurta(SG_PRFX_POCO))
    
    SG_PRFX_POCO_ANP = row['POCO']
    wells.append(SG_PRFX_POCO_ANP)
    wells.append(_semhifen(SG_PRFX_POCO_ANP))
    wells.append(_semhifen_semuf(SG_PRFX_POCO_ANP))
    wells.append(_seminicio(SG_PRFX_POCO_ANP))
    wells.append(_semfim(SG_PRFX_POCO_ANP))
    wells.append(_semtipo(SG_PRFX_POCO_ANP))
    wells.append(_versaocurta(SG_PRFX_POCO_ANP))

    return(set(wells))

In [11]:
def _well_type(row):
    wellTypeMap = {ontology.special_well: ['Especial'],
                   ontology.explotatory_well: ['Desenvolvimento', 'Injeção'],
                   ontology.adjacent_well: ['Pioneiro Adjacente'],
                   ontology.appraisal_well: ['Extensão'],
                   ontology.deeper_prospect_well: ['Jazida Mais Profunda'],
                   ontology.shallower_prospect_well: ['Jazida Mais Rasa'],
                   ontology.stratigraphic_well: ['Estratigráfico'],
                   ontology.wildcat_well: ['Pioneiro']
                  }
    for key, names in wellTypeMap.items():
        if row['CATEGORIA'] in names:
            newWell = key(row['COD_POCO'])
    return newWell

In [12]:
def _well_quality(row):
    wellQuality = -1
    wellQualityMap = {ontology.abandonned_well: ['ABANDONADO POR ACIDENTE MECÂNICO',
                                                 'ABANDONADO POR ERUPÇÃO',
                                                 'ABANDONADO POR IMPOSSIBILIDADE DE AVALIAÇÃO',
                                                 'ABANDONADO POR OBJETIVO FORA DE PREVISÃO',
                                                 'ABANDONADO POR OBJETIVO/ALVO NÃO ATINGIDO',
                                                 'ABANDONADO POR OUTRAS RAZÕES',
                                                 'ABANDONADO POR PERDA CIRCULAÇÃO'
                                                ] ,
                      ontology.commercial_well: ['PRODUTOR COMERCIAL DE GÁS NATURAL',
                                                 'PRODUTOR COMERCIAL DE GÁS NATURAL E CONDENSADO',
                                                 'PRODUTOR COMERCIAL DE PETRÓLEO',
                                                 'PRODUTOR COMERCIAL DE PETRÓLEO E GÁS NATURAL',
                                                 'PRODUTOR COMERCIAL DE PETRÓLEO, GÁS NATURAL E CONDENSADO'
                                                ],
                      ontology.discovery_well: ['DESCOBRIDOR DE CAMPO COM GÁS NATURAL',
                                                'DESCOBRIDOR DE CAMPO COM GÁS NATURAL E CONDENSADO',
                                                'DESCOBRIDOR DE CAMPO COM PETRÓLEO',
                                                'DESCOBRIDOR DE CAMPO COM PETRÓLEO E GÁS NATURAL',
                                                'DESCOBRIDOR DE CAMPO COM PETRÓLEO, GÁS NATURAL E CONDENSADO',
                                                'DESCOBRIDOR DE NOVA JAZIDA GÁS NATURAL',
                                                'DESCOBRIDOR DE NOVA JAZIDA GÁS NATURAL E CONDENSADO',
                                                'DESCOBRIDOR DE NOVA JAZIDA PETRÓLEO',
                                                'DESCOBRIDOR DE NOVA JAZIDA PETRÓLEO E GÁS NATURAL',
                                                'DESCOBRIDOR DE NOVA JAZIDA PETRÓLEO, GÁS NATURAL E CONDENSADO'
                                               ],
                      ontology.dry_hole_without_traces_of_hydrocarbon: ['SECO SEM INDÍCIOS'
                                                                       ],
                      ontology.dry_hole_with_traces_of_hydrocarbon: ['SECO COM INDÍCIOS GÁS NATURAL',
                                                                     'SECO COM INDÍCIOS DE GÁS NATURAL E CONDENSADO',
                                                                     'SECO COM INDÍCIOS DE PETRÓLEO',
                                                                     'SECO COM INDÍCIOS DE PETRÓLEO E GÁS NATURAL'
                                                                    ],
                      ontology.well_with_hydrocarbon:['PORTADOR DE GÁS NATURAL',
                                                      'PORTADOR DE GÁS NATURAL E CONDENSADO',
                                                      'PORTADOR DE PETRÓLEO',
                                                      'PORTADOR DE PETRÓLEO E GÁS NATURAL',
                                                      'PORTADOR DE PETRÓLEO, GÁS NATURAL E CONDENSADO'
                                                     ],
                      ontology.subcommercial_well: ['PRODUTOR SUBCOMERCIAL DE GÁS NATURAL',
                                                    'PRODUTOR SUBCOMERCIAL DE GÁS NATURAL E CONDENSADO',
                                                    'PRODUTOR SUBCOMERCIAL DE PETRÓLEO',
                                                    'PRODUTOR SUBCOMERCIAL DE PETRÓLEO E GÁS NATURAL',
                                                    'PRODUTOR SUBCOMERCIAL DE PETRÓLEO, GÁS NATURAL E CONDENSADO'
                                                   ]
                     }
    for key, name in wellQualityMap.items():
        if row['RECLASSIFICACAO'] in name:
            wellQuality = key
    return wellQuality

In [13]:
def _create_well_instance(row, generateSynonyms = False):
    if generateSynonyms:
        wells = _well_names(row)
    else:
        wells = []
        wells.append(row['POCO'])
        wells.append(row['POCO_OPERADOR'])
        wells = set(wells)   
        
    newWell = _well_type(row)
    well_quality = _well_quality(row)
    for well in wells:
        newWell.label.append(well)

In [14]:
def _create_simple_well_instance(ontology, row, generateSynonyms = False):
    if generateSynonyms:
        wells = _well_names(row)
    else:
        wells = []
        wells.append(row['POCO'])
        wells.append(row['POCO_OPERADOR'])
        wells = set(wells)
    
    newWell = ontology.well(row['COD_POCO'])
    for well in wells:
        newWell.label.append(well)
    
    return ontology

In [15]:
def _insert_wells(ontology, wellTablePath, generateSynonyms = False):
    '''
    Populates the WELL class of a given ontology with instances.
    Entries: dataframe path with well table (Excel file) and ontology containing a #well class (.owl)
    Returns the populated ontology
    '''
    df = pd.read_excel(wellTablePath)
    #df = df.fillna('')

    for i, row in df.iterrows():
        #_create_well_instance(row)
        ontology=_create_simple_well_instance(ontology, row)
        
    newWell = ontology.well('POCO_GENERICO')
    
    return ontology

#### Insert external URIs

Creates new instances for the BASIN, FIELD, LITHOSTRATIGRAPHIC UNIT, GEOLOGICAL STRUCTURE and ROCK TEXTURE classes.

In [16]:
def _insert_new_instances(ontology, listPath, namedEntity, synonyms = False):
    '''
    Populates all ontology classes with new instances.
    Entries: list of instance codes and names (json), the class to be populated (str), and the and ontology containing the class (.owl)
    Returns the populated ontology
    '''
    ontologyIri = list(ontology.classes())[0].iri.split('#')[0]
    
    classMap = {ontology.basin: 'bacia',
                ontology.field: 'campo',
                ontology.lithostratigraphic_unit: 'litoestratigrafia',
                ontology.geological_structure: 'estrutura',
                ontology.textura: 'textura',
                ontology.geological_time_interval: 'cronoestratigrafia',
                ontology.well: 'poço'
               }
    
    with open(listPath, 'r') as f:
        instances = json.load(f)
    
    for key, name in classMap.items():
        if namedEntity in name:
            entity = key
    
    if synonyms:
        for instance in instances:
            synonymInstance = ontology.search(iri = ontologyIri+'#'+instance)[0]
            for name in instances[instance]:
                synonymInstance.label.append(name)
    else:
        for instance in instances:
            newInstance = key(instance)
            for name in instances[instance]:
                newInstance.label.append(name)
    
    return ontology

In [17]:
ontologyPath = '../../OntoGeoLogicaEntidadesNomeadasPython.owl'
basinListPath = 'codigos_bacias.json'
fieldListPath = 'codigos_campos.json'
lithostratigraphyGroupListPath = 'codigos_grupo.json'
lithostratigraphyFormationListPath = 'codigos_formacao.json'
lithostratigraphyMemberListPath = 'codigos_membro.json'
chronostratigraphyListPath = 'unidades_cronoestratigraficas.json'
wellTablePath = 'dados_anp.xlsx'

In [18]:
ontology = _load_ontology(ontologyPath)

In [19]:
ontology = _insert_basins(ontology, basinListPath)

In [20]:
ontology = _insert_fields(ontology, fieldListPath)

In [21]:
ontology = _insert_lithostratigraphic_units(ontology, lithostratigraphyGroupListPath)

In [22]:
ontology = _insert_lithostratigraphic_units(ontology, lithostratigraphyFormationListPath)

In [23]:
ontology = _insert_lithostratigraphic_units(ontology, lithostratigraphyMemberListPath)

In [24]:
ontology = _insert_chronostratigraphic_units(ontology, chronostratigraphyListPath)

In [25]:
ontology = _insert_wells(ontology, wellTablePath)

In [33]:
ontology.save(file="OntoGeoLogicaANPInstancias.owl")

In [29]:
ontology.CAMPO_290039

OntoGeoLogicaEntidadesNomeadasPython.CAMPO_290039

In [30]:
ontology.CAMPO_290039.label

['SÉPIA']

In [28]:
ontology.BACIA_29.label

['Santos']

In [31]:
ontology.POCO_0074316029346.label

['8SEP3RJS', '8-SEP-3-RJS']

In [32]:
ontology.search(iri='*GRUPO_*')

[OntoGeoLogicaEntidadesNomeadasPython.GRUPO_01, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_02, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_03, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_04, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_05, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_06, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_07, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_08, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_09, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_10, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_11, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_12, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_13, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_14, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_15, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_16, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_17, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_18, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_19, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_20, OntoGeoLogicaEntidadesNomeadasPython.GRUPO_21, OntoGeoLogic