In [13]:
from Tools.CardClass import Card
from Tools.csv_utils import create_csv
import argparse
import re
import os
import sys

def parseText(lines, card):
    for line in lines:
        line = re.split('\n', line)[0]
        
        # Extract the well's name/number
        wellName = re.search('NO\. .*', line)
        if wellName:
            card.well_name = wellName.group()[4:]

        # Extract the well's operator
        operator = re.search('(OPERATOR|OPR)\.?\ ?.*', line)
        if operator:
            card.operator = re.split('(OPERATOR|OPR)\.?\ ?', operator.group())[0]
        
        # Extract the township location of the well
        township = re.search('TWP\.?\ ?[0-9]{1,2}', line)
        if township:
            card.township = re.split("TWP\.?\ ?", township.group())[1]+'N'

        # Extract the range location of the well
        range = re.search('RGE\.?\ ?[0-9]{1,3}', line)
        if range:
            card.range = re.split('RGE\.?\ ?', range.group())[1]+'W'

        # Extract the township section of the well
        section = re.search('SEC\.?\ ?[0-9]{1,2}', line)
        if section:
            card.section = re.split('SEC\.?\ ?', section.group())[1]

        # Extract the North line footage of the well
        NL = re.search('NL\.\ ?[0-9]{1,2}', line)
        if NL:
            card.NS_Footage = NL.group()

        # Extract the South line footage of the well
        SL = re.search('SL\.\ ?[0-9]{1,2}', line)
        if SL:
            if len(card.NS_Footage) != 0: 
                card.NS_Footage += " "
            card.NS_Footage += SL.group()

        # Extract the East line footage of the well
        EL = re.search('EL\.\ ?[0-9]{1,2}', line)
        if EL:
            card.EW_Footage = EL.group()

        # Extract the West line footage of the well
        WL = re.search('WL\.\ ?[0-9]{1,2}', line)
        if WL:
            if len(card.EW_Footage) != 0:
                card.EW_Footage+=" "
            card.EW_Footage += WL.group()

        # Extract the NSFootage of the well
        Qtr_Qtr = re.search('(C\ |C\/2\ )?((((N|E|S|W)\/2)|(NE|NW|SE|SW))\ ){2,3}', line)
        if Qtr_Qtr:
            card.Qtr_Qtr = Qtr_Qtr.group()
        
        # Extract the elevation of the well
        elev = re.search('ELEV.*', line)
        if elev:
            card.elevation = re.split('ELEV\.?\ ?', elev.group())[1]
        
        # Extract the spud date
        spud = re.search('COMM\.?\ ?\d{1,2}(\/|\-)\d{1,2}(\/|\-)\d{2,4}', line)
        if spud:
            card.spud = re.split('COMM\.?\ ?', spud.group())[1]

        # Extract the completion date
        comp = re.search('COMP\.?\ ?\d{1,2}(\/|\-)\d{1,2}(\/|\-)\d{2,4}', line)
        if comp:
            card.completion = re.split('COMP\.?\ ?', comp.group())[1]

        formation_list = [
            'SURFACE',
            'WASATCH',
            'FT. UNION',
            'LANCE',
            'FOX HILLS',
            'MESA VRD',
            'PARKMAN',
            'SHANNON',
            'EAGLE',
            'NIOBRARA',
            'FRONTIER',
            'WALL CK',
            'GRINHORN',
            'MOWRY',
            'MUDDY',
            'N.CASTLE',
            'DAKOTA',
            'FALL RVR',
            'LAKOTA',
            'CLOVERLY',
            'MORRISON',
            'SUNDANCE',
            'CHUGWTR',
            'SPEARFISH'
        ]

        # Extract the Formation TD's
        for formation in formation_list:
            pattern = formation+'\.?\ *[0-9]{1,4}'
            # print(pattern)
            formationTD = re.search(pattern, line)
            if formationTD:
                card.TD_Formation+= ' ' + formationTD.group()
        
        # Extract the total depth
        TD = re.search('(T|7)\.D\.\ ?[0-9]{2,4}', line)
        if TD:
            card.total_depth = TD.group()
        
        # Extract the plug back
        PB = re.search('P\.B\.\ ?[0-9]{2,4}', line)
        if PB:
            card.plug_back = PB.group()
                
    # Set the Location
    card.setLocation()
    card.setLocationFootage()
    
    return card


# def main():
#     # Parse input and output directory options
#     parser = argparse.ArgumentParser()
#     parser.add_argument("-i", "--input", required=True,
#                         help="Path to directory containing text files")
#     parser.add_argument("-o", "--output", required=True,
#                         help="File path of CSV will be outputted to")
#     args = vars(parser.parse_args())
    
#     if args["output"][-4:] != '.csv':
#         print("ERROR: Output file must be .csv!", file=sys.stderr)
#         return

#     create_csv(args["output"])
txt_dir = '/project/arcc-students/csloan5/OilWellCards/test_set/vert_card_text/'
csv_file = '/project/arcc-students/csloan5/OilWellCards/test_output/first_try.csv'
for filename in os.listdir(txt_dir):
    file = os.path.join(txt_dir, filename)
    card = Card() 
    card.documentID = os.path.basename(os.path.splitext(os.path.splitext(file)[0])[0]+'.pdf')
    card.typeID = '3'
    create_csv(csv_file)
    with open(file, 'r') as text:
        lines = text.readlines()
        card = parseText(lines, card)
        card.printCardInfo()
        arr = card.get_value_list()
        append_row(csv_file, arr)

# if __name__ =='__main__':
#     main()

documentID: 148-0185.pdf
typeID: 3
api: 
well_name: 
operator: 
location: 
township: 
range: 
section: 
NS_Footage: 
EW_Footage: 
Qtr_Qtr: 
locationFootage: 
elevation: 
spud: 
completion: 
TD_Formation: 
total_depth: 
plug_back: 
casing: 
init_prod: 
card_num: 
well_status: 
reissued: 
DSTS_Cores: 
documentID: 145-0177.pdf
typeID: 3
api: 
well_name: 1-L
operator: 
location: 27N 113W sec. 27
township: 27N
range: 113W
section: 27
NS_Footage: 
EW_Footage: WL. 22
Qtr_Qtr: SW SW 
locationFootage:  WL. 22
elevation: 6851
spud: 5-20-41
completion: 7/6/41
TD_Formation: 
total_depth: 
plug_back: 
casing: 
init_prod: 
card_num: 
well_status: 
reissued: 
DSTS_Cores: 
documentID: 144-0286.pdf
typeID: 3
api: 
well_name: 
operator: 
location: 27N 113W sec. 9
township: 27N
range: 113W
section: 9
NS_Footage: 
EW_Footage: 
Qtr_Qtr: NW SW NW 
locationFootage: 
elevation: 
spud: 6-9-45
completion: 
TD_Formation: 
total_depth: 7.D. 2339
plug_back: 
casing: 
init_prod: 
card_num: 
well_status: 
reissued: 

In [6]:
attempt = '/project/arcc-students/csloan5/OilWellCards/test_output/first_try.csv'
create_csv(attempt)
new = Card()
new.documentID = "testingbtif"
arr = new.get_value_list()
from Tools.csv_utils import append_row
append_row(attempt, arr)
# append_row(attempt, arr)