In [1]:
import pandas as pd
import numpy as np
import math
import os
import re
import json
import unicodedata
from owlready2 import *
from getpass import getpass
from IPython.display import clear_output
import collections

In [2]:
chave  = os.getenv('BG40')
senha  = getpass('Senha: ')

os.environ['HTTP_PROXY']  = f'http://{chave}:{senha}@inet-sys.petrobras.com.br:804'
os.environ['HTTPS_PROXY'] = f'http://{chave}:{senha}@inet-sys.petrobras.com.br:804'
os.environ['NO_PROXY']    = '127.0.0.1, localhost, petrobras.com.br, petrobras.biz'

Senha:  ········


In [3]:
def _load_ontology(ontologyPath):
    ontology = get_ontology("file://"+ontologyPath)
    ontology.load()
    return ontology

#### Insert list of basins as individuals

Creates instances for the BASIN entity. Each instance is registered under a URI with an unique code (BASE_CD_BACIA)

In [4]:
def _insert_basins(ontology, basinListPath, uri='BASE_CD_BACIA_'):
    '''
    Populates the BASIN class of a given ontology with instances.
    Entries: list of basin codes and names (json) and ontology containing a #basin class (.owl)
    Returns the populated ontology
    '''
    with open(basinListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.basin(uri+str(instance).zfill(3))
        for name in instances[instance]:
            newInstance.label.append(name)
    
    return ontology

#### Insert list of fields as individuals

Creates instances for the FIELD entity. Each instance is registered under a URI with an unique code (CAMP_CD_CAMPO).

In [5]:
def _insert_fields(ontology, fieldListPath, uri='CAMP_CD_CAMPO_'):
    '''
    Populates the FIELD class of a given ontology with instances.
    Entries: list of field codes and names (json) and ontology containing a #field class (.owl)
    Returns the populated ontology
    '''
    with open(fieldListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.field(uri+str(instance).zfill(4))
        for name in instances[instance]:
            newInstance.label.append(name)

    return ontology

#### Insert list of lithostratigraphic units as individuals

Creates instances for the LITHOSTRATIGRAPHIC_UNIT entity. Each instance is registered under a sequential numeric code.

In [6]:
def _insert_lithostratigraphic_units(ontology, lithostratigraphyListPath):
    '''
    Populates the LITHOSTRATIGRAPHIC UNIT class of a given ontology with instances.
    Identifies granularity of the ontology, to populate either the lithostratigraphic_unit superclass,
    or the GROUP, FORMATION and MEMBER classes
    Entries: list of lithostratigraphic codes and names (json) and ontology containing a #lithostratigraphic_unit class (.owl)
    Returns the populated ontology
    '''
    classes = []
    for ontoClass in list(ontology.classes()):
        classes.append(ontoClass.iri.split('#')[1])
        
    with open(lithostratigraphyListPath, 'r') as f:
        instances = json.load(f)
    
    if 'group' and 'formation' and 'member' in classes:
        for instance in instances['group'].keys():
            newInstance=ontology.group(instance)
            for name in instances['group'][instance]:
                newInstance.label.append(name)
        for instance in instances['formation'].keys():
            newInstance=ontology.formation(instance)
            for name in instances['formation'][instance]:
                newInstance.label.append(name)
        for instance in instances['member'].keys():
            newInstance=ontology.member(instance)
            for name in instances['member'][instance]:
                newInstance.label.append(name)
    else:
        for lithostratigraphicUnit in instances.keys():
            for instance in instances[lithostratigraphicUnit].keys():
                newInstance=ontology.lithostratigraphic_unit(instance)
                for name in instances[lithostratigraphicUnit][instance]:
                    newInstance.label.append(name)
    
    newInstance=ontology.lithostratigraphic_unit('SD_000')
    newInstance.label.append('Embasamento')
    
    return ontology

#### Insert geological structure instances

Creates instances for the GEOLOGICAL STRUCTURE entity. Each instance is registered under a URI with an unique numeric code.

In [7]:
def _insert_geological_structures(ontology, geologicalStructureListPath, uri='TEFR_CD_TIPO_EST_FISICA_ROCHA_'):
    '''
    Populates the GEOLOGICAL STRUCTURE class of a given ontology with instances.
    Entries: list of geological structure codes and names (json) and ontology containing a #geological_structure class (.owl)
    Returns the populated ontology
    '''
    with open(geologicalStructureListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.geological_structure(uri+str(instance).zfill(3))
        newInstance.label.append(instances[instance])

    return ontology

#### Insert texture instances

Creates instances for the TEXTURE entity. Each instance is registered under a URI with an unique numeric code.

In [8]:
def _insert_rock_textures(ontology, textureListPath, uri='textura_'):
    '''
    Populates the ROCK TEXTURE class of a given ontology with instances.
    Entries: list of rock texture codes and names (json) and ontology containing a #textura class (.owl)
    Returns the populated ontology
    '''
    with open(textureListPath, 'r') as f:
        instances = json.load(f)
    
    for instance in instances:
        newInstance=ontology.textura(uri+str(instance).zfill(3))
        newInstance.label.append(instances[instance])

    return ontology

#### Insert list of chronostratigraphic units as individuals

Creates instances for the CHRONOSTRATIGRAPHIC_UNIT entity. Each instance is registered under a URI that represents the instance name.

In [9]:
def _insert_chronostratigraphic_units(ontology, chronostratigraphyListPath):
    '''
    Populates the GEOLOGICAL TIME INTERVAL class of a given ontology with instances.
    Entries: list of chronostratigraphic units codes and names (json) and ontology containing a #geological_time_interval class (.owl)
    Returns the populated ontology
    '''
    with open(chronostratigraphyListPath, 'r') as f:
        instances = json.load(f)
    
    for chronostratigraphicUnit in instances.keys():
        for instance in instances[chronostratigraphicUnit].keys():
            newInstance=ontology.geological_time_interval(instance)
            for name in instances[chronostratigraphicUnit][instance]:
                newInstance.label.append(name)

    return ontology

#### Insert list of wells as individuals

In [10]:
def _semhifen(name):
    # DERIVANDO TERMO SEM OS HÍFENS 
    # EX. 3-RJS-739  => 3RJS739
    # EX. 7-CRT-17HP-RJS => 7CRT17HPRJS
    return(name.replace("-",""))

def _semhifen_semuf(name):
    # DERIVANDO TERMO SEM OS HÍFENS e sem o identificador de UF ao final  
    # EX. 7-CRT-17HP-RJS => 7CRT17HP
    return(re.sub("-[A-Z]{0,3}$", '', name).replace("-",""))

def _seminicio(name):
    # EX. 7-CRT-17HP-RJS => CRT-17HP-RJS
    return(re.sub("^[0-9]-", '', name))

def _semfim(name):
    # EX. 7-CRT-17HP-RJS => 7-CRT-17HP
    return(re.sub("-[A-Z]{0,3}$", '', name))

def _semtipo(name):
    # retira indicação de tipo -> D H P A B C
    # EX. 7-CRT-17HP-RJS => 7-CRT-17-RJS
    return(re.sub("(?<=[0-9])[A-Z]{0,3}-", '-', name))

def _versaocurta(name):
    # CRIAR ALTERNATIVA AO POCO_NM_COMPLETO COM NOME CURTO MUITO USUAL EM DOCUMENTOS
    # EX. 8-LL-108D-RJS  => LL-108D
    # EX. 3-RJS-739  => RJS-739
    # EX. 3-RJS-739A => RJS-739A
    #^ => identifica	início da linha 	modelo = "^abc" 	Match com = "abcde" 	associa texto que inicia com a sequência "abc", embora não "yabc".
    #$ => identifica	fim da linha 	modelo = "abc$" 	Match com = "yzabc" 	associa texto que termina com a sequência "abc", embora não "abcde". 
    return(re.sub("((^[0-9]-)|(-[A-Z]{0,3}$))", '', name))
    
def _well_names(index, row):
    '''
    Generates a list of veriations for the name of a given well in the well DataFrame
    '''
    wells = []
    
    SG_PRFX_POCO = row['DMPO_SG_PRFX_POCO']
    wells.append(SG_PRFX_POCO)
    wells.append(_semhifen(SG_PRFX_POCO))
    wells.append(_semhifen_semuf(SG_PRFX_POCO))
    wells.append(_seminicio(SG_PRFX_POCO))
    wells.append(_semfim(SG_PRFX_POCO))
    wells.append(_semtipo(SG_PRFX_POCO))
    wells.append(_versaocurta(SG_PRFX_POCO))
    
    if row['DMPO_NM_CMPT_POCO'] != '':
        NM_CMPT_POCO = row['DMPO_NM_CMPT_POCO']
        wells.append(NM_CMPT_POCO)
        wells.append(_semhifen(NM_CMPT_POCO))
        wells.append(_semhifen_semuf(NM_CMPT_POCO))
        wells.append(_seminicio(NM_CMPT_POCO))
        wells.append(_semfim(NM_CMPT_POCO))
        wells.append(_semtipo(NM_CMPT_POCO))
        wells.append(_versaocurta(NM_CMPT_POCO))
    
    if row['DMPO_SG_PRFX_POCO_ANP'] != '':
        SG_PRFX_POCO_ANP = row['DMPO_SG_PRFX_POCO_ANP']
        wells.append(SG_PRFX_POCO_ANP)
        wells.append(_semhifen(SG_PRFX_POCO_ANP))
        wells.append(_semhifen_semuf(SG_PRFX_POCO_ANP))
        wells.append(_seminicio(SG_PRFX_POCO_ANP))
        wells.append(_semfim(SG_PRFX_POCO_ANP))
        wells.append(_semtipo(SG_PRFX_POCO_ANP))
        wells.append(_versaocurta(SG_PRFX_POCO_ANP))

    if row['DMPO_SG_PREF_POCO_DEPEX'] != '':
        # DERIVANDO TERMO SEM OS ESPAÇOS 
        # EX. 1EDA 0001 BA  => 1EDA0001BA
        # EX. 1EBAN0001 SE  => 1EBAN0001SE
        SG_PREF_POCO_DEPEX_SEMESPACO = row['DMPO_SG_PREF_POCO_DEPEX'].replace(" ","")
        wells.append(SG_PREF_POCO_DEPEX_SEMESPACO)
    
        # DERIVANDO TERMO SEM OS ZEROS A ESQUERDA
        # EX. 1EBAN0001SE  => 1EBAN1SE
        # O techo (?<=[A-Z]) -> verifica se tem letra antes mas não considera na substituição, é chamado de asserção retroativa positiva
        wells.append(re.sub("(?<=[A-Z])0{1,3}", '', SG_PREF_POCO_DEPEX_SEMESPACO))

        # DERIVANDO TERMO SEM OS ESPAÇOS ENTRE OS IDENTIFICADORES PARA HOMOGENEIZAR COM OUTRAS FONTES
        # EX. 1EDA 0001 BA  => 1-EDA-0001-BA
        # EX. 1EBAN0001 SE  => 1-EBAN-0001-SE
        SG_PREF_POCO_DEPEX_COMHIFEN = re.sub("\s{1,5}", '-', row['DMPO_SG_PREF_POCO_DEPEX']) # SUBSTITUI ESPAÇOS
        SG_PREF_POCO_DEPEX_COMHIFEN = re.sub("(?<=[A-Z])0", '-0', SG_PREF_POCO_DEPEX_COMHIFEN)
        SG_PREF_POCO_DEPEX_COMHIFEN = re.sub("(?<=[A-Z])00", '-00', SG_PREF_POCO_DEPEX_COMHIFEN)
        SG_PREF_POCO_DEPEX_COMHIFEN = re.sub("(?<=[A-Z])000", '-000', SG_PREF_POCO_DEPEX_COMHIFEN)
        wells.append(SG_PREF_POCO_DEPEX_COMHIFEN)
    
        # DERIVANDO TERMO SEM OS ZEROS A ESQUERDA E COM HÍFEN INICIAL NA SEGUNDA POSIÇÃO
        # EX. 7CB-0017D-SES  => 7-CB-0017D-SES
        s = SG_PREF_POCO_DEPEX_COMHIFEN
        s = s[:1] + "-" +  s[1:]
        wells.append(s)
    
        # DERIVANDO TERMO SEM OS ZEROS A ESQUERDA
        # EX. 7CB-0017D-SES  => 7CB-17D-SES
        # como existem dados ruins na base onde não existe o hífen para referenciar, tenho que tratar
        SG_PREF_POCO_DEPEX_COMHIFEN_SEMZERONAESQUERDA = re.sub("(?<=[A-Z]|-)0{1,3}", '', SG_PREF_POCO_DEPEX_COMHIFEN)
        wells.append(SG_PREF_POCO_DEPEX_COMHIFEN_SEMZERONAESQUERDA)

        # DERIVANDO TERMO SEM OS ZEROS A ESQUERDA E COM HÍFEN INICIAL NA SEGUNDA POSIÇÃO
        # EX. 7CB-17D-SES  => 7-CB-17D-SES
        s = SG_PREF_POCO_DEPEX_COMHIFEN_SEMZERONAESQUERDA
        s = s[:1] + "-" +  s[1:]
        wells.append(s)

    return(set(wells))

In [11]:
def _well_type(row):
    wellTypeMap = {ontology.well: ['NÃO ATRIBUÍDO'],
                   ontology.special_well: ['ESPECIAL'],
                   ontology.explotatory_well: ['INJECAO', 'PRODUCAO'],
                   ontology.adjacent_well: ['PIONEIRO ADJACENTE'],
                   ontology.appraisal_well: ['EXTENSAO'],
                   ontology.deeper_prospect_well: ['JAZIDA MAIS PROFUNDA'],
                   ontology.shallower_prospect_well: ['JAZIDA MAIS RASA'],
                   ontology.stratigraphic_well: ['ESTRATIGRAFICO'],
                   ontology.wildcat_well: ['PIONEIRO']
                  }
    for key, names in wellTypeMap.items():
        if row['DMPO_NM_CLSS_POCO'] in names:
            newWell = key('POCO_CD_POCO_'+str(row['DMPO_CD_POCO']).zfill(6))
    return newWell

In [12]:
def _well_quality(row):
    wellQuality = -1
    wellQualityMap = {ontology.abandonned_well: ['ABANDONADO POR ACIDENTE MECANICO',
                                                 'ABANDONADO POR BLOW-OUT',
                                                 'ABANDONADO POR IMPOSSIBILIDADE DE AVALIACAO',
                                                 'ABANDONADO POR OBJETIVO FORA DE PREVISAO',
                                                 'ABANDONADO POR OBJETIVO/ALVO NAO ATINGIDO',
                                                 'ABANDONADO POR OUTRAS RAZOES',
                                                 'ABANDONADO POR PERDA DE CIRCULACAO'
                                                ] ,
                      ontology.commercial_well: ['PRODUTOR COMERCIAL DE GAS',
                                                 'PRODUTOR COMERCIAL DE GAS E CONDENSADO',
                                                 'PRODUTOR COMERCIAL DE OLEO',
                                                 'PRODUTOR COMERCIAL DE OLEO E GAS',
                                                 'PRODUTOR COMERCIAL DE OLEO, GAS E CONDENSADO'
                                                ],
                      ontology.discovery_well: ['DESCOBRIDOR DE CAMPO COM GAS ',
                                                'DESCOBRIDOR DE CAMPO COM GAS E CONDENSADO ',
                                                'DESCOBRIDOR DE CAMPO COM OLEO ',
                                                'DESCOBRIDOR DE CAMPO COM OLEO E GAS ',
                                                'DESCOBRIDOR DE CAMPO COM OLEO, GAS E CONDENSADO',
                                                'DESCOBRIDOR DE NOVA JAZIDA COM GAS',
                                                'DESCOBRIDOR DE NOVA JAZIDA COM GAS E CONDENSADO',
                                                'DESCOBRIDOR DE NOVA JAZIDA COM OLEO',
                                                'DESCOBRIDOR DE NOVA JAZIDA COM OLEO E GAS',
                                                'DESCOBRIDOR DE NOVA JAZIDA COM OLEO, GAS E CONDENSADO'
                                               ],
                      ontology.dry_hole_without_traces_of_hydrocarbon: ['SECO SEM INDICIOS DE PETROLEO'
                                                                       ],
                      ontology.dry_hole_with_traces_of_hydrocarbon: ['SECO COM INDICIOS DE GAS',
                                                                     'SECO COM INDICIOS DE GAS E CONDENSADO',
                                                                     'SECO COM INDICIOS DE OLEO',
                                                                     'SECO COM INDICIOS DE OLEO E GAS',
                                                                     'SECO COM INDICIOS DE OLEO, GAS E CONDENSADO'
                                                                    ],
                      ontology.well_with_hydrocarbon:['PORTADOR DE GAS',
                                                      'PORTADOR DE GAS E CONDENSADO',
                                                      'PORTADOR DE OLEO',
                                                      'PORTADOR DE OLEO E GAS',
                                                      'PORTADOR DE OLEO, GAS E CONDENSADO'
                                                     ],
                      ontology.subcommercial_well: ['PRODUTOR SUBCOMERCIAL DE GAS ',
                                                    'PRODUTOR SUBCOMERCIAL DE GAS E CONDENSADO ',
                                                    'PRODUTOR SUBCOMERCIAL DE OLEO',
                                                    'PRODUTOR SUBCOMERCIAL DE OLEO E GAS ',
                                                    'PRODUTOR SUBCOMERCIAL DE OLEO,GAS E CONDENSADO'
                                                   ]
                     }
    for key, name in wellQualityMap.items():
        if row['DMPO_NM_RCLS_POCO'] in name:
            wellQuality = key
    return wellQuality

In [13]:
def _create_well_instance(row, generateSynonyms = False):
    if generateSynonyms:
        wells = _well_names(row)
    else:
        wells = []
        wells.append(row['DMPO_SG_PRFX_POCO'])
        if row['DMPO_NM_CMPT_POCO'] != '':
            wells.append(row['DMPO_NM_CMPT_POCO'])
        if row['DMPO_SG_PRFX_POCO_ANP'] != '':
            wells.append(row['DMPO_SG_PRFX_POCO_ANP'])
        wells = set(wells)   
        
    newWell = _well_type(row)
    well_quality = _well_quality(row)
    for well in wells:
        newWell.label.append(well)

In [14]:
def _create_simple_well_instance(ontology, row, uri, generateSynonyms = False):
    if generateSynonyms:
        wells = _well_names(row)
    else:
        wells = []
        wells.append(row['DMPO_SG_PRFX_POCO'])
        if row['DMPO_NM_CMPT_POCO'] != '':
            wells.append(row['DMPO_NM_CMPT_POCO'])
        if row['DMPO_SG_PRFX_POCO_ANP'] != '':
            wells.append(row['DMPO_SG_PRFX_POCO_ANP'])
        wells = set(wells)   
        
    newWell = ontology.well('POCO_CD_POCO_'+str(row['DMPO_CD_POCO']).zfill(6))
    for well in wells:
        newWell.label.append(well)

In [15]:
def _insert_wells(ontology, wellTablePath, uri = 'POCO_CD_POCO_', generateSynonyms = False):
    '''
    Populates the WELL class of a given ontology with instances.
    Entries: dataframe path with well table (Excel file) and ontology containing a #well class (.owl)
    Returns the populated ontology
    '''
    df = pd.read_excel(wellTablePath)
    df = df.fillna('')

    for i, row in df.iterrows():
        #_create_well_instance(row)
        _create_simple_well_instance(ontology, row, uri, generateSynonyms)
        
    newWell = ontology.well('POCO_GENERICO')
    
    return ontology

#### Insert external URIs

Creates new instances for the BASIN, FIELD, LITHOSTRATIGRAPHIC UNIT, GEOLOGICAL STRUCTURE and ROCK TEXTURE classes.

In [16]:
def _insert_new_instances(ontology, listPath, namedEntity, synonyms = False):
    '''
    Populates all ontology classes with new instances.
    Entries: list of instance codes and names (json), the class to be populated (str), and the and ontology containing the class (.owl)
    Returns the populated ontology
    '''
    ontologyIri = list(ontology.classes())[0].iri.split('#')[0]
    
    classMap = {ontology.basin: 'bacia',
                ontology.field: 'campo',
                ontology.lithostratigraphic_unit: 'litoestratigrafia',
                ontology.geological_structure: 'estrutura',
                ontology.textura: 'textura',
                ontology.geological_time_interval: 'cronoestratigrafia',
                ontology.well: 'poço'
               }
    
    with open(listPath, 'r') as f:
        instances = json.load(f)
    
    for key, name in classMap.items():
        if namedEntity in name:
            entity = key
    
    if synonyms:
        for instance in instances:
            synonymInstance = ontology.search(iri = ontologyIri+'#'+instance)[0]
            for name in instances[instance]:
                synonymInstance.label.append(name)
    else:
        for instance in instances:
            newInstance = key(instance)
            for name in instances[instance]:
                newInstance.label.append(name)
    
    return ontology

In [17]:
ontologyPath = 'OntoGeoLogicaEntidadesNomeadasPython.owl'
basinListPath = 'resources/bacia/bacias.json'
fieldListPath = 'resources/campo/campos.json'
lithostratigraphyListPath = 'resources/litoestratigrafia/unidades_litoestratigraficas.json'
geologicalStructureListPath = 'resources/estrutura_textura_porosidade/estrutura.json'
textureListPath = 'resources/estrutura_textura_porosidade/textura.json'
chronostratigraphyListPath = 'resources/cronoestratigrafia/unidades_cronoestratigraficas.json'
wellTablePath = 'resources/poco/wellData.xlsx'

In [18]:
ontology = _load_ontology(ontologyPath)

In [19]:
ontology = _insert_basins(ontology, basinListPath)

In [20]:
ontology = _insert_fields(ontology, fieldListPath)

In [21]:
ontology = _insert_lithostratigraphic_units(ontology, lithostratigraphyListPath)

In [22]:
ontology = _insert_geological_structures(ontology, geologicalStructureListPath)

In [23]:
ontology = _insert_rock_textures(ontology, textureListPath)

In [24]:
ontology = _insert_chronostratigraphic_units(ontology, chronostratigraphyListPath)

In [25]:
ontology = _insert_wells(ontology, wellTablePath)

In [None]:
externalBasinPath = 'resources/bacia/bacias_externas.json'
synonymBasinPath = 'resources/bacia/bacias_sinonimos.json'
externalFieldPath = 'resources/campo/campos_externos.json'
synonymFieldPath = 'resources/campo/campos_sinonimos.json'
synonymWellPath = 'resources/poco/pocos_sinonimos.json'
externalStructurePath = 'resources/estrutura_textura_porosidade/estruturas_externas.json'
externalTexturePath = 'resources/estrutura_textura_porosidade/texturas_externas.json'
externalLithostratigraphicPath = 'resources/litoestratigrafia/unidades_lito_externas.json'

In [None]:
ontology = _insert_new_instances(ontology, externalBasinPath, 'bacia', synonyms = False)

In [None]:
ontology = _insert_new_instances(ontology, synonymBasinPath, 'bacia', synonyms = True)

In [None]:
ontology = _insert_new_instances(ontology, externalFieldPath, 'campo', synonyms = False)

In [None]:
ontology = _insert_new_instances(ontology, synonymFieldPath, 'campo', synonyms = True)

In [None]:
ontology = _insert_new_instances(ontology, synonymWellPath, 'poço', synonyms = True)

In [None]:
ontology = _insert_new_instances(ontology, externalStructurePath, 'estrutura', synonyms = False)

In [None]:
ontology = _insert_new_instances(ontology, externalTexturePath, 'textura', synonyms = False)

In [None]:
ontology = _insert_new_instances(ontology, externalLithostratigraphicPath, 'litoestratigrafia', synonyms = False)

In [26]:
ontology.save(file="OntoGeoLogicaInstancias.owl")

In [27]:
ontology.CAMP_CD_CAMPO_0066.label

['PIRAMBU']