# Bighorn Sheep and Goat ETL 

In [1]:
#import libraries
import pandas as pd
import numpy as np
import re

#import functions
from functions import get_sample_id
from functions import panda_stripper


In [2]:
#load bighorn sheep data
sheep_table = pd.read_excel('data/Bighorn sheep_2021_22_Sample sheet.xlsx')
sheep_table = panda_stripper(sheep_table)

#rename columns
sheep_table.columns = ['sample_id', 'collar_id', 'species', 'sex', 'capture_date', 'capture_unit', 'staging_area', 'comments']

sheep_table.head()

Unnamed: 0,sample_id,collar_id,species,sex,capture_date,capture_unit,staging_area,comments
0,SRS1101,50750.0,Bighorn Sheep,Female,2021-12-03,North San Rafael,Swinging Bridge,2 tonsillar swabs and 2 nasal swabs (SRS1101)
1,SRS1102,50744.0,Bighorn Sheep,Female,2021-12-04,North San Rafael,Swinging Bridge,2 tonsillar swabs and 2 nasal swabs (SRS1102)
2,SRS1103,50749.0,Bighorn Sheep,Female,2021-12-03,North San Rafael,Swinging Bridge,"Short blood, veins flat, worried about heat. 2..."
3,SRS1104,50758.0,Bighorn Sheep,Female,2021-12-04,North San Rafael,Swinging Bridge,2 tonsillar swabs and 2 nasal swabs (SRS1104)
4,SRS1105,50752.0,Bighorn Sheep,Female,2021-12-03,North San Rafael,Swinging Bridge,"Recapture. Old collar: 75535, 148.307. 2 tonsi..."


In [3]:
#load and strip table
goat_table = pd.read_excel('data/Mt. Goat_2021_22_Sample sheet.xlsx')
goat_table = panda_stripper(goat_table)

#rename cols
goat_table.columns = ['sample_id', 'collar_id', 'species', 'sex', 'capture_date', 'capture_unit', 'comments']

In [4]:
#concat tables
sheep_goat_table = pd.concat([sheep_table, goat_table])

## Lab Results

### m. ovi ELISA

In [5]:
#read and strip table
movi_elisa_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='movi_elisa', usecols=[1,2,3])
movi_elisa_df = panda_stripper(movi_elisa_df) 

#get sample_id and drop original col
movi_elisa_df = get_sample_id(movi_elisa_df, 'Animal')
del movi_elisa_df['Animal']

#rename cols
movi_elisa_df.columns = ['movi_elisa_val', 'movi_elisa_result', 'sample_id']

#merge with sheep goat table
sheep_goat_table = sheep_goat_table.merge(movi_elisa_df, on= 'sample_id', how='outer')

### M. ovi PCR

In [6]:
#read and strip tables
movi_pcr_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='movi_pcr', usecols=[0,2])
movi_pcr_df = panda_stripper(movi_pcr_df)

#get sample ids and drop original col
movi_pcr_df = get_sample_id(movi_pcr_df, 'Animal')
del movi_pcr_df['Animal']

#rename cols
movi_pcr_df.columns = ['movi_pcr_result', 'sample_id']

#merge with sheep goat table
sheep_goat_table = sheep_goat_table.merge(movi_pcr_df, on= 'sample_id', how='outer')

### lentivirus

In [7]:
#read and strip tables
lentivirus_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='lentivirus', usecols=[1,2,3])
lentivirus_df = panda_stripper(lentivirus_df)

#get sample id and drop original col
lentivirus_df = get_sample_id(lentivirus_df, 'Animal')
del lentivirus_df['Animal']

#rename cols
lentivirus_df.columns = ['lentivirus_val', 'lentivirus_result', 'sample_id']

#merge with sheep goat table
sheep_goat_table = sheep_goat_table.merge(lentivirus_df, on='sample_id', how='outer')

### ehdv

In [8]:
#read and strip tables
ehdv_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='ehdv', usecols=[1,2,3])
ehdv_df = panda_stripper(ehdv_df)

#get sample id and drop original col
ehdv_df = get_sample_id(ehdv_df, 'Animal')
del ehdv_df['Animal']

#rename cols
ehdv_df.columns = ['ehdv_val', 'ehdv_result', 'sample_id']

#merge with sheep goat table
sheep_goat_table = sheep_goat_table.merge(ehdv_df, on='sample_id', how='outer')

### bluetongue

In [9]:
#read and strip table
bluetongue_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='bluetongue', usecols=[1,2])
bluetongue_df = panda_stripper(bluetongue_df)

#get sample ids and delete original col
bluetongue_df = get_sample_id(bluetongue_df, 'Animal')
del bluetongue_df['Animal']

#rename cols
bluetongue_df.columns = ['bluetongue_result', 'sample_id']

#clean results col
bluetongue_df.bluetongue_result = bluetongue_df.bluetongue_result.apply(lambda row: 'Negative' if row.startswith('Neg') else 'Positive')

#merge with sheep goats table
sheep_goat_table = sheep_goat_table.merge(bluetongue_df, on='sample_id', how='outer')

### Pasteurella leukotoxin lktA by PCR

In [10]:
#read and strip tables
lktA_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='lktA_pcr', usecols=[0,2])
lktA_df = panda_stripper(lktA_df)

#get sample ids and drop original col
lktA_df = get_sample_id(lktA_df, 'Animal')
del lktA_df['Animal']

#rename cols
lktA_df.columns = ['leukotoxin_lktA_result', 'sample_id']

#merge with sheep goats
sheep_goat_table = sheep_goat_table.merge(lktA_df, on='sample_id', how='outer')


### Aerobic Cultures

SOP-BACT-2 for various bacteria 

In [11]:
#read and strip tables
bact_df = pd.read_excel('data/sheep_goat_tables.xlsx', sheet_name='sop_bact_2', usecols=[0,2,3])
bact_df = panda_stripper(bact_df)

#get sample id and drop original col
bact_df = get_sample_id(bact_df, 'Animal')
del bact_df['Animal']

#rename cols
bact_df.columns = ['tonsular_culture_result', 'tonsular_culture_isolate', 'sample_id']

#merge with sheet goats table
sheep_goat_table = sheep_goat_table.merge(bact_df, on='sample_id', how='outer')

## Load

### Export excel sheet for DWR office

In [12]:
sheep_goat_table.to_excel('data/finals/Big Horn and Mtn Goats 2021-2022 Lab Results.xlsx', index=False)