In [1]:
import sqlalchemy
import cx_Oracle

engine = sqlalchemy.create_engine('oracle://{}:{}@{}:{}/{}')
conn = engine.connect()
print(conn)

<sqlalchemy.engine.base.Connection object at 0x00000000049FEE80>


In [2]:
import pandas

In [3]:
# get 3 key types of inspections after 01/2014 from CASE_ACTION
actions = conn.execute("select * from CASE_ACTION where CSA_DATE3 > DATE '2014-01-01' and ACTION_DESCRIPTION in ('Final Grade Inspection', 'Open Hole Demo Inspection ', 'Winter Grade Inspection')")
caseaction_df = pandas.DataFrame(actions.fetchall())
caseaction_df.columns = actions.keys()

print(caseaction_df.shape)
caseaction_df[:3]

(44760, 37)


Unnamed: 0,csa_id,case_type,actn_menu_id,actn_code,actn_version_no,csm_caseno,action_description,csa_creation_date,csa_date1,csa_date2,...,csa_y_coord,csa_ivr_confirm_no,csa_submitted_from_wireless,csa_start_mileage,csa_end_mileage,csa_total_mileage,csa_vehicle_id,csa_start_time,csa_end_time,csa_total_time
0,201605181601105180,DNG,L,50,1,DNG2012-00302,Open Hole Demo Inspection,2016-05-18 16:01:10,2016-05-18,2016-05-19,...,,,,,,,,,,
1,201605181601297830,BLD,C,150,1,BLD2016-03247,Open Hole Demo Inspection,2016-05-18 16:01:29,2016-05-18,2016-05-19,...,,,,,,,,,,
2,201605181602105850,DNG,L,50,1,DNG2015-00589,Open Hole Demo Inspection,2016-05-18 16:02:10,2016-05-18,2016-05-19,...,,,,,,,,,,


In [4]:
# what types of cases do we capture in actions? EMGs?
caseaction_df['case_type'].unique()

array(['DNG', 'BLD', 'MRC'], dtype=object)

In [5]:
# get everything from CASE_PARCEL
parcels = conn.execute("select * from CASE_PARCEL")
caseparcel_df = pandas.DataFrame(parcels.fetchall())
caseparcel_df.columns = parcels.keys()

print(caseparcel_df.shape)
caseparcel_df[:3]

(1080010, 5)


Unnamed: 0,csm_caseno,prc_avp_no,prc_parcel_no,csp_updated,csp_updateby
0,ELE2000-04999,0,21 62539.,2000-10-25,L-SB
1,ELE2000-05000,0,15 2525.003,2000-10-25,L-SB
2,ELE2000-05001,0,0,2000-10-25,L-SB


In [6]:
# join actions to parcels
action_parcels_df = caseaction_df.merge(caseparcel_df, on='csm_caseno', how='inner')

print(action_parcels_df.shape)

(44863, 41)


In [7]:
# clean Tidemark's parcel nums to standard format
def clean_pnum(pnum):
    # it's zeroes; return nothing
    if pnum in ['0', '00', '000']:
        return None
    
    # there's a dot with stuff after it
    if '.' in pnum and pnum[-1] != '.':
        end = pnum[pnum.find('.'):]
        beginning = pnum.split('.')[0]
    # there's a dot at the very end
    elif '.' in pnum and pnum[-1] == '.':
        end = pnum[-1]
        beginning = pnum[:-1]
    # there's a dash with stuff after it
    elif '-' in pnum and pnum[-1] != '-':
        end = pnum[pnum.find('-'):]
        beginning = pnum.split('-')[0]
    else:
        end = ''
        beginning = pnum
    
    # only a few of these cases that don't have a space
    if ' ' not in beginning and len(beginning) == 8:
        return beginning + end
    elif ' ' not in beginning and len(beginning) < 8:
        return beginning.zfill(8) + end
    
    # most will have a space
    if ' ' in beginning:
        ward = beginning.split(' ')[0]
        lot = beginning.split(' ')[1]
        if len(ward) == 1:
            ward = ward.zfill(2)
        return ward + lot.zfill(6) + end

In [8]:
# spot check
tm_pnums = [
    '22 24234-48',
    '16 42779.',
    '1 5272.',
    '9 11029.',
    '8 8268-70',
    '20 5882.001',
    '22 109754.',
    '14 12936.002L',
    '4 1939.',
    '22 101006.',
    '1 4148-54'
]

for i in tm_pnums:
    print(clean_pnum(i))

22024234-48
16042779.
01005272.
09011029.
08008268-70
20005882.001
22109754.
14012936.002L
04001939.
22101006.
01004148-54


In [9]:
# add a new col with clean parcels
action_parcels_df['clean_parcel_no'] = action_parcels_df['prc_parcel_no'].apply(lambda x: clean_pnum(x))

In [10]:
# add a new col with a unique id
action_parcels_df['unique_id'] = action_parcels_df['csa_id'] + "_" + action_parcels_df['prc_parcel_no']
action_parcels_df[:3]

Unnamed: 0,csa_id,case_type,actn_menu_id,actn_code,actn_version_no,csm_caseno,action_description,csa_creation_date,csa_date1,csa_date2,...,csa_vehicle_id,csa_start_time,csa_end_time,csa_total_time,prc_avp_no,prc_parcel_no,csp_updated,csp_updateby,clean_parcel_no,unique_id
0,201605181601105180,DNG,L,50,1,DNG2012-00302,Open Hole Demo Inspection,2016-05-18 16:01:10,2016-05-18,2016-05-19,...,,,,,0,22 24234-48,2012-01-11 11:17:22,W-DS,22024234-48,201605181601105180_22 24234-48
1,201606171540288560,DNG,L,70,1,DNG2012-00302,Final Grade Inspection,2016-06-17 15:40:28,2016-06-17,2016-06-20,...,,,,,0,22 24234-48,2012-01-11 11:17:22,W-DS,22024234-48,201606171540288560_22 24234-48
2,201605240946189630,DNG,L,50,1,DNG2012-00302,Open Hole Demo Inspection,2016-05-24 09:46:18,2016-05-24,2016-05-24,...,,,,,0,22 24234-48,2012-01-11 11:17:22,W-DS,22024234-48,201605240946189630_22 24234-48


In [11]:
# sort df by inspection date
action_parcels_df = action_parcels_df.sort_values(by=['csa_date3'], ascending=[False])

In [13]:
# write it to a csv or send it to postgres or whatever