## Package Import

In [1]:
import pandas as pd
import openpyxl
import os

## Set Pandas Display Options

In [2]:
# enables showing both long and wide tables
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

## Create function to truncate comment field length

In [3]:
def auto_truncate(val):
    return val[:200]

## Data Inputs
For now, CARIBOU only in the metadata sheeet. Codes have not been well defined for the other species. Further consultation needed for them.

In [4]:
# Paths to raw code tables, changing these tables will change how values are coded.
# codetable is an export of the BCTW code table as csv
codetable = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\code_table_20211021.csv"

# codeheader is an export of the BCTW code header table as csv
codeheader = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\codeheader_table_20211021.csv" 

# codelookup is a custom table that maps Casylys metadata to BCTW db fields, code headers, and what type of field it is for updating.
codelookup = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\codefieldlookup.csv"

In [5]:
# metadata is the raw Casylys sheet as xlsx
metadata= r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\final_tables_for_prod\September_2021_Monthly_Collar_Summary.xlsm"

# metadatasheet defines the sheet in metadata where the data is stored
metadatasheet = "Itch"

In [6]:
# casylyscodes translate Casylys values to BCTW plain text descriptions
casylyscodes = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\casylyscodes.csv"

In [7]:
# animal is a blank copy of the BCTW animal table
animal = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs\BCTW_bulk_import_animal_template.csv"

# device is a blank copy of the BCTW device table
device = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs\BCTW_bulk_import_device_template.csv"

## Data Output Location

In [8]:
outputloc = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs"

## Create Dataframes For Input Code Tables
These are used to classify data later

In [9]:
# dfcodetable and dfcodeheader filter out "deleted" data, will automatically drop inactive codes and headers, this logic delete on valid_to is used thruought BCTW
# indexes have been set on all the dataframes to enable seamless joining later on, do not change index fields
dfcodetable = pd.read_csv(codetable,index_col="code_header_id")
dfcodetable = dfcodetable[dfcodetable["valid_to"].isnull()]

dfcodeheader = pd.read_csv(codeheader,index_col="code_header_id")
dfcodeheader = dfcodeheader[dfcodeheader["valid_to"].isnull()]

dfcodelookup = pd.read_csv(codelookup,index_col='codeheader')

## Build Active Code Table
This table is built automatically by filtering out unused codes and code headers and then joining on the code map for which Casylys data goes to which db field.

In [10]:
activecode = dfcodetable.join(dfcodeheader,on="code_header_id", rsuffix="_header",sort=True)
activecode = activecode[["code_id","code_name","code_description","code_header_name"]]

In [11]:
finalcodetable = activecode.join(dfcodelookup)
finalcodetable = finalcodetable[finalcodetable["dbfield"].notnull()]

In [12]:
dfcodelookup= dfcodelookup[dfcodelookup.index.notnull()]

## Load Metadata sheet

In [13]:
# this fieldlist limits Casylys incoming data to the userful stuff
fieldlist=['Region',
 'Species',
 'Caribou Population Unit',
 'WLH ID',
 'Animal ID',
 'Sex',
 'Life Stage',
 'Calf at Heel',
 'Ear Tag Right',
 'Ear Tag Left',
 'Device ID',
 'Radio Frequency',
 'Re-capture',
 'Reg_Key',
 'Trans-location',
 'Collar Type',
 'Collar Make',
 'Collar Model',
 'Satellite Network',
 'Capture Date',
 'Capture Date Year',
 'Capture Date Month',
 'Capture UTM Zone',
 'Capture UTM Easting',
 'Capture UTM Northing',
 'Capture Latitude',
 'Capture Longitude',
 'Release Date',
 'Animal Status',
 'Deployed',
 'Collar Status Details',
 'Deactivated?',
 'Collar Retrieved?',
 'Mortality Date',
 'Malfunction Date',
 'Malfunction Type',
 'Retrieval Date',
 'Mortality UTM Zone',
 'Mortality UTM Easting',
 'Mortality UTM Northing',
 'Mortality Latitude',
 'Mortality Longitude',
 'Max Transmission Date',
 'Previous Regional Comments',
 'Mortality Review',
 'Comments',
 'CID']

In [14]:
# pass Caslys datasheet into a dataframe
dfmetadata = pd.read_excel(metadata,sheet_name=metadatasheet, engine='openpyxl',usecols=fieldlist, converters = {'Comments': auto_truncate, 'Mortality Review': auto_truncate})

  warn(msg)


In [15]:
# these fields have been identified as needing their values changed to BCTW coded values, if needing to reclass another field, add to this list 
# and make sure the code_header field in codelookup has been added
standardfields =['Animal Status', 'Calf at Heel', 'Caribou Population Unit', 'Collar Make', 'Deployed', 'Collar Status Details', 'Collar Type', 'Life Stage', 'Malfunction Type','Region', 'Satellite Network', 'Sex', 'Species']

## Load Casylys cleaning table

In [16]:
# dfcasylyscodes contains a mapping how to chang Casylys raw data into BCTW code descriptions
dfcasylyscodes = pd.read_csv(casylyscodes,usecols=("DataField","DataFieldDescription","CodeforinDataField","BCTWCode","Coded","CodeMeaning"),index_col='DataField')
dfcasylyscodes.fillna(' ',inplace=True)

## Clean Casylys to BCTW Text

In [17]:
# using field list above, select that column and using the dfcasylyscodes build a dictionary, pd.map will overwrite field with the correct values
for i in standardfields:
    x= dfmetadata[i]
    shortcodelist = dfcasylyscodes.loc[i]
    mapdict = dict(zip(shortcodelist.CodeforinDataField, shortcodelist.BCTWCode))
    dfmetadata[i] = dfmetadata[i].map(mapdict)

    

## Cell below uses the the active codes to reclassify metadata tables, output is in the same format as input but with BCTW codes for applicable fields

In [18]:
# this code converts the BCTW description value from step above into the BCTW code value, output will be fields filled with integers
for i in dfcodelookup.index.values:
    x = dfcodelookup['Casylys'][int(i)]
    print(("{} has been updated!").format(x))
    shortcodelist = finalcodetable.loc[int(i)]
    mapdict= dict(zip(shortcodelist.code_description, shortcodelist.code_id))
    dfmetadata[x] = dfmetadata[x].map(mapdict)

Region has been updated!
Caribou Population Unit has been updated!
Sex has been updated!
Life Stage has been updated!
Calf at Heel has been updated!
Collar Type has been updated!
Collar Make has been updated!
Satellite Network has been updated!
Animal Status has been updated!
Deployed has been updated!
Collar Status Details has been updated!
Malfunction Type has been updated!


## Convert all boolean fields from y/n to true/false

In [19]:
# loads codelookup into a new dataframe to identify the boolean fields
dfboollookupfields = pd.read_csv(codelookup,index_col="type")
dfboollookupfields = dfboollookupfields.loc['bool']

In [20]:
# boolean field dictionary, null will be left as null
booldict = {'Y':'True','N':'False'}

In [21]:
# apply the dictionary to the boolean fields, output should be either True or False, replaces all Y and N values
for f in dfboollookupfields['Casylys']:
    dfmetadata[f] = dfmetadata[f].map(booldict)
    

## Load BCTW output templates and prepare to have data migrated to them

### Change metadata field names to bctw

In [33]:
dfmetadata

Unnamed: 0,region,species,population_unit,wlh_id,animal_id,sex,life_stage,juvenile_at_heel,ear_tag_right_id,ear_tag_left_id,device_id,frequency,recapture,Reg_Key,translocation,device_type,device_make,device_model,satellite_network,capture_date,Capture Date Year,Capture Date Month,capture_utm_zone,capture_utm_easting,capture_utm_northing,capture_latitude,capture_longitude,release_date,animal_status,device_deployment_status,device_status,activation_status,retrieved,mortality_date,malftunction_date,device_malfunction_type,retrieval_date,mortality_utm_zone,mortality_utm_easting,mortality_utm_northing,mortality_latitude,mortality_longitude,offline_date,Previous Regional Comments,mortality_comment,animal_comment,CID
0,515,M-RATA,1056,17-10748,6,455.0,,,0-1889,,101800,150.14,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-10,2018.0,3.0,10.0,401428.0,5767390.0,,,,376.0,434,428.0,,,NaT,2020-08-17,946.0,NaT,,,,,,10-Aug-2020,"Last fix came through Aug 17, 2020. No mortali...",,*Moved to Rainbows in May 2018,26
1,515,M-RATA,1056,17-10793,7,455.0,,420.0,0-1861,,101801,150.19,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-12,2018.0,3.0,10.0,372188.0,5774520.0,,,,373.0,434,428.0,,,NaT,2018-07-21,,NaT,,,,,,19-Jul-2018,,,Last fix 7/21/2019,35
2,515,M-RATA,1056,17-10785,8,455.0,,420.0,0-1873,,101803,150.22,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-13,2018.0,3.0,10.0,398601.0,5770273.0,,,,376.0,434,428.0,,,NaT,2018-07-01,,NaT,,,,,,30-Jun-2018,,,Last fix 7/18/2018; *Moved to Rainbows in May ...,44
3,515,M-RATA,1056,17-10770,29,455.0,,420.0,0-1792,,101824,150.74,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-08,2018.0,3.0,10.0,374843.0,5777591.0,,,,376.0,434,428.0,,,NaT,2018-09-23,,NaT,,,,,,21-Sep-2018,,,Last fix 9/24/2019,46
4,515,M-RATA,1056,17-10761,30,455.0,,420.0,0-1892,,101825,150.76,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-08,2018.0,3.0,10.0,374661.0,5774908.0,,,,376.0,434,428.0,,,NaT,2018-11-07,,NaT,,,,,,25-Oct-2018,,,Last fix 11/08/2018,49
5,515,M-RATA,1056,17-10772,9,455.0,,420.0,0-1857,,101804,150.25,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-09,2018.0,3.0,10.0,416340.0,5776235.0,,,,374.0,435,429.0,,True,2018-07-27,NaT,,2018-07-28,10.0,406868.0,5837723.0,,,23-Jul-2018,,,Probable wolf Predation,51
6,515,M-RATA,1056,17-10764,50,454.0,,420.0,0-1771,,101888,151.78,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-11,2018.0,3.0,10.0,412236.0,5781892.0,,,,374.0,435,429.0,,True,2018-08-26,NaT,,2018-08-27,10.0,425006.0,5837108.0,,,23-Aug-2018,,"Email alert Aug 26, 2018.",Mortality site 2.4km from mortality alert loca...,85
7,515,M-RATA,1056,17-10776,10,455.0,,420.0,0-1855,,101805,150.28,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-11,2018.0,3.0,10.0,412753.0,5781047.0,,,,376.0,434,428.0,,,NaT,2018-11-20,,NaT,,,,,,12-Nov-2018,,,Last fix 11/21/2018,57
8,515,M-RATA,1056,17-10749,46,454.0,,420.0,0-1890,,101884,151.34,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-11,2018.0,3.0,10.0,400326.0,5774272.0,,,,374.0,435,429.0,,True,2018-03-22,NaT,,2018-03-27,10.0,404158.0,5770389.0,,,15-Mar-2018,,,Cougar Predation,69
9,515,M-RATA,1056,17-10775,47,454.0,,420.0,0-1881,,101885,151.35,False,Y,False,957,278,Lifecycle Pro 360 with dropoff,380,2018-03-11,2018.0,3.0,10.0,404393.0,5776215.0,,,,374.0,435,429.0,,True,2018-08-17,NaT,,2018-08-20,10.0,425430.0,5835746.0,,,16-Aug-2018,,"Email alert Aug 17, 2018.",Likely inexperienced bear per comms Wildlife H...,70


In [22]:
# load codelookup again to build a dictionary of Casylys to BCTW field names
dfcodelookupfields = pd.read_csv(codelookup)
dfcodelookupfields = dfcodelookupfields[dfcodelookupfields['dbfield'].notnull()]

In [23]:
# builds field map dictionary
fielddict = dict(zip(dfcodelookupfields.Casylys,dfcodelookupfields.dbfield))

In [24]:
# renames all fields in the above dictionary
dfmetadata=dfmetadata.rename(columns=(fielddict))

## Load blank csv templates and put matching fields into them and export as csv
Animal output split into recapture and not recpature. Recaptures needs to be handled differently and require the animal uid to be assigned to it. Might have to consider manually adding those records. Solution unclear.

In [25]:
# blank csv used to format the output of the dataframes, this template was downloaded from BCTW data import wizard
dfanimal = pd.read_csv(animal)
dfdevice = pd.read_csv(device)

In [26]:
# load blank templates into df
dfdeviceout = pd.concat([dfdevice, dfmetadata],join='inner')
dfanimalout = pd.concat([dfanimal, dfmetadata],join='inner')

In [27]:
# split df animal into recapture y or n
dfanimalrecapture = dfanimalout[dfanimalout['recapture']=='True']
dfanimalnotrecapture = dfanimalout[dfanimalout['recapture']!= 'True']

In [28]:
# concatenate animal fields into template, only fields that exist in the output will be copied, export only recapture = N
a_output = os.path.join(outputloc,'dfanimal.csv')
dfanimalnotrecapture.to_csv(a_output,index=False)

In [29]:
# concatenate animal fields into template, export only recapture records, recapture = Y
ar_output = os.path.join(outputloc,'recapture_dfanimal.csv')
dfanimalrecapture.to_csv(ar_output,index=False)

In [30]:
# split df device into deployed or not deployed
dfdevicedeployed = dfdeviceout[dfdeviceout['device_deployment_status']==434]
dfdevicenotdeployed = dfdeviceout[dfdeviceout['device_deployment_status']!= 434]

In [31]:
# concatenate device fields into template, only fields that exist in the output will be copied, export device_deployment_status != 434, this will capture all other status other than deployed
d_output = os.path.join(outputloc, 'notddfdevice.csv')
dfdevicenotdeployed.to_csv(d_output,index=False)

In [32]:
# concatenate device fields into template, only fields that exist in the output will be copied, export device_deployment_status = 434, 434 is deployed, will have to load backwards. This captures retrieved and redeployed collars
dd_output = os.path.join(outputloc, 'dfdevice.csv')
dfdevicedeployed.to_csv(dd_output,index=False)