## Package Import

In [None]:
import pandas as pd
import openpyxl
import os

## Set Pandas Display Options

In [None]:
# enables showing both long and wide tables
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)

## Data Inputs
For now, CARIBOU only in the metadata sheeet. Codes have not been well defined for the other species. Further consultation needed for them.

In [None]:
# Paths to raw code tables, changing these tables will change how values are coded.
# codetable is an export of the BCTW code table as csv
codetable = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\code_table_20211021.csv"

# codeheader is an export of the BCTW code header table as csv
codeheader = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\codeheader_table_20211021.csv" 

# codelookup is a custom table that maps Casylys metadata to BCTW db fields, code headers, and what type of field it is for updating.
codelookup = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\codefieldlookup.csv"

In [None]:
# metadata is the raw Casylys sheet as xlsx
metadata= r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\test_data\May_2021_Monthly_Collar_Summary.xlsx"

# metadatasheet defines the sheet in metadata where the data is stored
metadatasheet = "Master Summary May 2021"

In [None]:
# casylyscodes translate Casylys values to BCTW plain text descriptions
casylyscodes = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\inputs\casylyscodes.csv"

In [None]:
# animal is a blank copy of the BCTW animal table
animal = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs\BCTW_bulk_import_animal_template.csv"

# device is a blank copy of the BCTW device table
device = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs\BCTW_bulk_import_device_template.csv"

## Data Output Location

In [None]:
outputloc = r"\\level\s40006\ESD\EI\Wildlife\WSI\Caribou_Program\Projects\BCTW\dataloading_scripting\bctw-migrate\templates\outputs"

## Create Dataframes For Input Code Tables
These are used to classify data later

In [None]:
# dfcodetable and dfcodeheader filter out "deleted" data, will automatically drop inactive codes and headers, this logic delete on valid_to is used thruought BCTW
# indexes have been set on all the dataframes to enable seamless joining later on, do not change index fields
dfcodetable = pd.read_csv(codetable,index_col="code_header_id")
dfcodetable = dfcodetable[dfcodetable["valid_to"].isnull()]

dfcodeheader = pd.read_csv(codeheader,index_col="code_header_id")
dfcodeheader = dfcodeheader[dfcodeheader["valid_to"].isnull()]

dfcodelookup = pd.read_csv(codelookup,index_col='codeheader')

## Build Active Code Table
This table is built automatically by filtering out unused codes and code headers and then joining on the code map for which Casylys data goes to which db field.

In [None]:
activecode = dfcodetable.join(dfcodeheader,on="code_header_id", rsuffix="_header",sort=True)
activecode = activecode[["code_id","code_name","code_description","code_header_name"]]

In [None]:
finalcodetable = activecode.join(dfcodelookup)
finalcodetable = finalcodetable[finalcodetable["dbfield"].notnull()]

In [None]:
dfcodelookup= dfcodelookup[dfcodelookup.index.notnull()]

## Load Metadata sheet

In [None]:
# this fieldlist limits Casylys incoming data to the userful stuff
fieldlist=['Region',
 'Species',
 'Caribou Population Unit',
 'WLH ID',
 'Animal ID',
 'Sex',
 'Life Stage',
 'Calf at Heel',
 'Ear Tag Right',
 'Ear Tag Left',
 'Device ID',
 'Radio Frequency',
 'Re-capture',
 'Reg_Key',
 'Trans-location',
 'Collar Type',
 'Collar Make',
 'Collar Model',
 'Satellite Network',
 'Capture Date',
 'Capture Date Year',
 'Capture Date Month',
 'Capture UTM Zone',
 'Capture UTM Easting',
 'Capture UTM Northing',
 'Capture Latitude',
 'Capture Longitude',
 'Release Date',
 'Animal Status',
 'Deployed',
 'Collar Status Details',
 'Deactivated?',
 'Collar Retrieved?',
 'Mortality Date',
 'Malfunction Date',
 'Malfunction Type',
 'Retrieval Date',
 'Mortality UTM Zone',
 'Mortality UTM Easting',
 'Mortality UTM Northing',
 'Mortality Latitude',
 'Mortality Longitude',
 'Max Transmission Date',
 'Previous Regional Comments',
 'Mortality Review',
 'Comments',
 'CID']

In [None]:
# pass Caslys datasheet into a dataframe
dfmetadata = pd.read_excel(metadata,sheet_name=metadatasheet, engine='openpyxl',usecols=fieldlist)

## Standardize Casylys form into BCTW values

In [None]:
# these fields have been identified as needing their values changed to BCTW coded values, if needing to reclass another field, add to this list 
# and make sure the code_header field in codelookup has been added
standardfields =['Animal Status', 'Calf at Heel', 'Caribou Population Unit', 'Collar Make', 'Deployed', 'Collar Status Details', 'Collar Type', 'Life Stage', 'Malfunction Type','Region', 'Satellite Network', 'Sex', 'Species']

## Load Casylys cleaning table

In [None]:
# dfcasylyscodes contains a mapping how to chang Casylys raw data into BCTW code descriptions
dfcasylyscodes = pd.read_csv(casylyscodes,usecols=("DataField","DataFieldDescription","CodeforinDataField","BCTWCode","Coded","CodeMeaning"),index_col='DataField')
dfcasylyscodes.fillna(' ',inplace=True)

## Clean Casylys to BCTW Text

In [None]:
# using field list above, select that column and using the dfcasylyscodes build a dictionary, pd.map will overwrite field with the correct values
for i in standardfields:
    x= dfmetadata[i]
    shortcodelist = dfcasylyscodes.loc[i]
    mapdict = dict(zip(shortcodelist.CodeforinDataField, shortcodelist.BCTWCode))
    dfmetadata[i] = dfmetadata[i].map(mapdict)

    

## Cell below uses the the active codes to reclassify metadata tables, output is in the same format as input but with BCTW codes for applicable fields

In [None]:
# this code converts the BCTW description value from step above into the BCTW code value, output will be fields filled with integers
for i in dfcodelookup.index.values:
    x = dfcodelookup['Casylys'][int(i)]
    print(("{} has been updated!").format(x))
    shortcodelist = finalcodetable.loc[int(i)]
    mapdict= dict(zip(shortcodelist.code_description, shortcodelist.code_id))
    dfmetadata[x] = dfmetadata[x].map(mapdict)

## Convert all boolean fields from y/n to true/false

In [None]:
# loads codelookup into a new dataframe to identify the boolean fields
dfboollookupfields = pd.read_csv(codelookup,index_col="type")
dfboollookupfields = dfboollookupfields.loc['bool']

In [None]:
# boolean field dictionary, null will be left as null
booldict = {'Y':'True','N':'False'}

In [None]:
# apply the dictionary to the boolean fields, output should be either True or False, replaces all Y and N values
for f in dfboollookupfields['Casylys']:
    dfmetadata[f] = dfmetadata[f].map(booldict)
    

## Load BCTW output templates and prepare to have data migrated to them

### Change metadata field names to bctw

In [None]:
# load codelookup again to build a dictionary of Casylys to BCTW field names
dfcodelookupfields = pd.read_csv(codelookup)
dfcodelookupfields = dfcodelookupfields[dfcodelookupfields['dbfield'].notnull()]

In [None]:
# builds field map dictionary
fielddict = dict(zip(dfcodelookupfields.Casylys,dfcodelookupfields.dbfield))

In [None]:
# renames all fields in the above dictionary
dfmetadata=dfmetadata.rename(columns=(fielddict))

## Load blank csv templates and put matching fields into them and export as csv

In [None]:
# blank csv used to format the output of the dataframes, this template was downloaded from BCTW data import wizard
dfanimal = pd.read_csv(animal)
dfdevice = pd.read_csv(device)

In [None]:
# load blank templates into df
dfdeviceout = pd.concat([dfdevice, dfmetadata],join='inner')
dfanimalout = pd.concat([dfanimal, dfmetadata],join='inner')

In [None]:
# concatenate animal fields into template, only fields that exist in the output will be copied
a_output = os.path.join(outputloc, 'dfanimal.csv')
dfanimalout.to_csv(a_output,index=False)

In [None]:
# concatenate device fields into template, only fields that exist in the output will be copied
d_output = os.path.join(outputloc, 'dfdevice.csv')
dfdeviceout.to_csv(d_output,index=False)