## TXRRC Oil and Gas Production file testing
This notebook is to test the oil and gas production files from TXRRC<br>
The libraries and file reading are to check the basic functionality of the layouts for each.

### Libraries and ensuring all necessary files are read into the notebook

In [1]:
import pandas as pd
import codecs

##Import section to ensure the main directory is in the path
import sys
import os
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

## Importing from the main directory now
from ebcdic_main import yield_blocks, parse_record
from layouts_oilProd import oilProd_layout
from layouts_gasProd import gasProd_layout
from ebcdic_formats import pic_any

### Opening Oil Production file and block definition

In [2]:
file_path = r'D:\python\rrc_Scratch\olf001l.ebc' ##Local storage location
##file origin: ftp://ftpe.rrc.texas.gov/sholed/olf001l.ebc.gz ## extracted with 7-zip locally


block_size  = 1200 ##block size for each record in the file
##Unknown if this holds true for all versions of this file or for other files on TXRRC

print('opening',file_path,'...')
file = open(file_path, 'rb') ##Opens the .ebc file and reads it as bytes

##Use limiting counter for testing formatting
Limiting_Counter = True

"""
##Section for testing the outputs
"""  
API = None ##this needs to be inplace incase the random part of the array selected does start on an 01 record
ct = 0 ##counter for number of records
wellct = 0 ##counter for number of wells
check_stop = 100 ##number of loop runs to complete before stopping

opening D:\python\rrc_Scratch\olf001l.ebc ...


### Loop for reading Oil Production blocks and writing the results to temporary dataframes

In [3]:
"""Loop section for all records or partial set"""
for block in yield_blocks(file, block_size): ##for each block in file
    
    ##For testing script
    if Limiting_Counter == True and wellct > check_stop: ##Stops the loop once a set number of wells has been complete
        break
        
    startval = pic_any(block[0:1]) ## first two characters of a block
    
    print(startval)
    print(type(startval))
    print(pic_any(block))
    
    
    """
    Selecting layout based on leading startval
    and parsing record based on the selected layout
    """
    layout = oilProd_layout(startval)['layout'] ##identifies layout based on record start values
    print(layout)
    parsed_vals = parse_record(block, layout) ##formats the record and returns a formated {dict} 

    temp_df  = pd.DataFrame([parsed_vals], columns=parsed_vals.keys()) ##convert {dict} to dataframe
    #temp_df['api10'] = API ##adds API number to record (might need to move this to first position)

    print(temp_df)
    
    wellct+=1

1
<class 'str'>
101 0000200100000000000000WILDCAT                         0000130210270290311972040199999000000000100000200000990100000000000000002000000000000000000000000000000000    n<                     AA                                                                                                                                         0                                                                                                                             0000000000000000000000000000000000000000000201908    000000   000                      201909    000000   000                      201910    000000   000                      201911    000000   000                      201912    000000   000                      202001    000000   000                      202002    000000   000                      202003    000000   000                      202004    000000   000                      202005    000000   000       

## Now for the GAS example

### Opening Gas Production file and block definition

In [4]:
file_path = r'D:\python\rrc_scratch\gsf001l.ebc' ##Local storage location
##file origin: ftp://ftpe.rrc.texas.gov/shgled/gsf001l.ebc.gz ## extracted with 7-zip locally


block_size  = 2120 ##block size for each record in the file
##Unknown if this holds true for all versions of this file or for other files on TXRRC

print('opening',file_path,'...')
file = open(file_path, 'rb') ##Opens the .ebc file and reads it as bytes

##Use limiting counter for testing formatting
Limiting_Counter = True

"""
##Section for testing the outputs
"""  
API = None ##this needs to be inplace incase the random part of the array selected does start on an 01 record
ct = 0 ##counter for number of records
wellct = 0 ##counter for number of wells
check_stop = 100 ##number of loop runs to complete before stopping

opening D:\python\rrc_scratch\gsf001l.ebc ...


In [5]:
"""Loop section for all records or partial set"""
for block in yield_blocks(file, block_size): ##for each block in file
    
    ##For testing script
    if Limiting_Counter == True and wellct > check_stop: ##Stops the loop once a set number of wells has been complete
        break
        
    startval = pic_any(block[0:1]) ## first two characters of a block
    
    print(startval)
    print(type(startval))
    print(pic_any(block))
    
    
    """
    Selecting layout based on leading startval
    and parsing record based on the selected layout
    """
    layout = gasProd_layout(startval)['layout'] ##identifies layout based on record start values
    print(layout)
    parsed_vals = parse_record(block, layout) ##formats the record and returns a formated {dict} 

    temp_df  = pd.DataFrame([parsed_vals], columns=parsed_vals.keys()) ##convert {dict} to dataframe
    #temp_df['api10'] = API ##adds API number to record (might need to move this to first position)

    print(temp_df)
    
    wellct+=1

1
<class 'str'>
101 000020010000000000000000000WILDCAT                         00001313726531132319580103AS 0000000000005J     K     00000066803500000202790000000000000000000000                                          00364000000000000000000000000000000000101000000000000                                              GAS FIELD REMARKS REMOVED AS OF NOVEMBER, 1984.                                                                                                                                                                                                                                         201908000   00      17     001000                                           201909000   00      17     001000                                           201910000   00      17     001000                                           201911000   00      17     001000                                           201912000   00      17     001000                                   