In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

sns.set_theme()

# jupyter notebook full-width display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# pandas formatting
pd.set_option('display.float_format', '{:.3f}'.format)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', 1000)

In [2]:
import statistics

# from django.contrib.auth.models import User
from django.core.validators import MaxValueValidator, MinValueValidator
from django.db import models
from django.db.models import Q
from django.template.defaultfilters import date
from django.urls import reverse
from django.utils import timezone
from django.utils.safestring import mark_safe
from django.utils.translation import gettext as _, gettext

In [3]:
User = 'User'

# TRAPNET

### Specimen

In [4]:
# class Specimen():

specimen_dict = dict(
    species = 'models.ForeignKey(Species, on_delete=models.DO_NOTHING, related_name="specimens")',
    life_stage = 'models.ForeignKey(LifeStage, related_name="specimens", on_delete=models.DO_NOTHING, blank=True, null=True)',
    reproductive_status = 'models.ForeignKey(ReproductiveStatus, related_name="specimens", on_delete=models.DO_NOTHING, blank=True, null=True)',
    maturity = 'models.ForeignKey(Maturity, related_name="specimens", on_delete=models.DO_NOTHING, blank=True, null=True)',

    status = 'models.ForeignKey(Status, on_delete=models.DO_NOTHING, related_name="specimens", blank=False, null=True)',
    sex = 'models.ForeignKey(Sex, on_delete=models.DO_NOTHING, related_name="specimens", blank=True, null=True)',
    adipose_condition = 'models.IntegerField(blank=True, null=True, verbose_name=_("adipose condition"), choices=model_choices.adipose_condition_choices)',

    fork_length = 'models.FloatField(blank=True, null=True, verbose_name=_("fork length (mm)"))',
    fork_length_bin_interval = 'models.FloatField(default=1, verbose_name=_("fork length bin interval (mm)"))',
    total_length = 'models.FloatField(blank=True, null=True, verbose_name=_("total length (mm)"))',

    weight = 'models.FloatField(blank=True, null=True, verbose_name=_("weight (g)"))',
    tag_number = 'models.CharField(max_length=12, blank=True, null=True, verbose_name=_("tag number"))',
    scale_id_number = 'models.CharField(max_length=50, blank=True, null=True, verbose_name=_("scale ID number"), unique=True)',

    # downstream
    age_type = 'models.IntegerField(blank=True, null=True, verbose_name=_("age type"), choices=model_choices.age_type_choices)',
    river_age = 'models.IntegerField(blank=True, null=True, verbose_name=_("river age"))',
    ocean_age = 'models.IntegerField(blank=True, null=True, verbose_name=_("ocean age"))',

    notes = 'models.TextField(blank=True, null=True)',

    sample = 'models.ForeignKey(Sample, on_delete=models.CASCADE, related_name="specimens", blank=True, null=True)',
    sweep = 'models.ForeignKey(Sweep, on_delete=models.CASCADE, related_name="specimens", blank=True, null=True)',
    old_id = 'models.CharField(max_length=25, null=True, blank=True, editable=False)',

    # to be deleted eventually
    origin = 'models.ForeignKey(Origin, on_delete=models.DO_NOTHING, related_name="specimens", blank=True, null=True, editable=False)',
    
    
    # PROPERTIES
    is_recapture = '@property def is_recapture(self): return self.status and self.status.code.lower() in ["rr", "rrl"]',
    first_tagging = '@property def first_tagging(self): return...'
    
#     @property
#     def is_recapture(self):
#         return 'self.status and self.status.code.lower() in ["rr", "rrl"]'

#     @property
#     def first_tagging(self):
#         return 'first tagging nested if statement'
#         if self.tag_number and self.is_recapture:
#             first_specimen_qs = Specimen.objects.filter(~Q(id=self.id)).filter(tag_number=self.tag_number)
#             if first_specimen_qs.exists():
#                 return first_specimen_qs.first()
)

In [5]:
# run_process_fish()

fish_kwargs = {
    "old_id": 'f\'GD_{r["GD_ID"]}\'',
    "sweep": 'sweep',
    "species": 'species',
    "life_stage": 'life_stage',
    "reproductive_status": 'None',
    "status": 'status',
    "adipose_condition": 'adipose_condition',
    "fork_length": 'r["FORK_LENGTH"]',
    "total_length": 'r["TOTAL_LENGTH"]',
    "weight": 'r["TOTAL_LENGTH"]',
    "sex": 'sex',
    "age_type": 'age_type',
    "river_age": 'r["RIVER_AGE"]',
    "scale_id_number": 'f\'{r["SCALE_SAMPLE_ID"]} {sample.arrival_date.year}\' if r["SCALE_SAMPLE_ID"] else None',
    "notes": 'str(), then some code'
}


In [6]:
df_spec = pd.DataFrame({k: [v] for k, v in specimen_dict.items()}, index=['django model']).T
df_spec

Unnamed: 0,django model
species,"models.ForeignKey(Species, on_delete=models.DO_NOTHING, related_name=""specimens"")"
life_stage,"models.ForeignKey(LifeStage, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)"
reproductive_status,"models.ForeignKey(ReproductiveStatus, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)"
maturity,"models.ForeignKey(Maturity, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)"
status,"models.ForeignKey(Status, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=False, null=True)"
sex,"models.ForeignKey(Sex, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=True, null=True)"
adipose_condition,"models.IntegerField(blank=True, null=True, verbose_name=_(""adipose condition""), choices=model_choices.adipose_condition_choices)"
fork_length,"models.FloatField(blank=True, null=True, verbose_name=_(""fork length (mm)""))"
fork_length_bin_interval,"models.FloatField(default=1, verbose_name=_(""fork length bin interval (mm)""))"
total_length,"models.FloatField(blank=True, null=True, verbose_name=_(""total length (mm)""))"


In [7]:
# which if these are included in the import script?

df_spec['IMPORT'] = None
for i, row in df_spec.iterrows():
    try:
        df_spec.loc[i, 'IMPORT'] = fish_kwargs[i]
    except KeyError:
        df_spec.loc[i, 'IMPORT'] = ''
    
df_spec

Unnamed: 0,django model,IMPORT
species,"models.ForeignKey(Species, on_delete=models.DO_NOTHING, related_name=""specimens"")",species
life_stage,"models.ForeignKey(LifeStage, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",life_stage
reproductive_status,"models.ForeignKey(ReproductiveStatus, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",
maturity,"models.ForeignKey(Maturity, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",
status,"models.ForeignKey(Status, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=False, null=True)",status
sex,"models.ForeignKey(Sex, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=True, null=True)",sex
adipose_condition,"models.IntegerField(blank=True, null=True, verbose_name=_(""adipose condition""), choices=model_choices.adipose_condition_choices)",adipose_condition
fork_length,"models.FloatField(blank=True, null=True, verbose_name=_(""fork length (mm)""))","r[""FORK_LENGTH""]"
fork_length_bin_interval,"models.FloatField(default=1, verbose_name=_(""fork length bin interval (mm)""))",
total_length,"models.FloatField(blank=True, null=True, verbose_name=_(""total length (mm)""))","r[""TOTAL_LENGTH""]"


In [8]:
# fields not included in the import script
df_spec[df_spec.IMPORT == '']

Unnamed: 0,django model,IMPORT
maturity,"models.ForeignKey(Maturity, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",
fork_length_bin_interval,"models.FloatField(default=1, verbose_name=_(""fork length bin interval (mm)""))",
tag_number,"models.CharField(max_length=12, blank=True, null=True, verbose_name=_(""tag number""))",
ocean_age,"models.IntegerField(blank=True, null=True, verbose_name=_(""ocean age""))",
sample,"models.ForeignKey(Sample, on_delete=models.CASCADE, related_name=""specimens"", blank=True, null=True)",
origin,"models.ForeignKey(Origin, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=True, null=True, editable=False)",
is_recapture,"@property def is_recapture(self): return self.status and self.status.code.lower() in [""rr"", ""rrl""]",
first_tagging,@property def first_tagging(self): return...,


In [9]:
df_spec[df_spec.IMPORT != '']

Unnamed: 0,django model,IMPORT
species,"models.ForeignKey(Species, on_delete=models.DO_NOTHING, related_name=""specimens"")",species
life_stage,"models.ForeignKey(LifeStage, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",life_stage
reproductive_status,"models.ForeignKey(ReproductiveStatus, related_name=""specimens"", on_delete=models.DO_NOTHING, blank=True, null=True)",
status,"models.ForeignKey(Status, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=False, null=True)",status
sex,"models.ForeignKey(Sex, on_delete=models.DO_NOTHING, related_name=""specimens"", blank=True, null=True)",sex
adipose_condition,"models.IntegerField(blank=True, null=True, verbose_name=_(""adipose condition""), choices=model_choices.adipose_condition_choices)",adipose_condition
fork_length,"models.FloatField(blank=True, null=True, verbose_name=_(""fork length (mm)""))","r[""FORK_LENGTH""]"
total_length,"models.FloatField(blank=True, null=True, verbose_name=_(""total length (mm)""))","r[""TOTAL_LENGTH""]"
weight,"models.FloatField(blank=True, null=True, verbose_name=_(""weight (g)""))","r[""TOTAL_LENGTH""]"
scale_id_number,"models.CharField(max_length=50, blank=True, null=True, verbose_name=_(""scale ID number""), unique=True)","f'{r[""SCALE_SAMPLE_ID""]} {sample.arrival_date.year}' if r[""SCALE_SAMPLE_ID""] else None"


### Samples

In [10]:
# class Sample(MetadataFields):

sample_dict = dict(
    site = "models.ForeignKey(RiverSite, related_name='samples', on_delete=models.DO_NOTHING)",
    sample_type = 'models.IntegerField(choices=model_choices.sample_type_choices)',
    monitoring_program = 'models.ForeignKey(MonitoringProgram, on_delete=models.DO_NOTHING, verbose_name=_("monitoring program"), help_text=_("The sample was collected under which monitoring program"), related_name="samples", blank=False, null=True)',
    arrival_date = 'models.DateTimeField(verbose_name="arrival date/time")',
    departure_date = 'models.DateTimeField(verbose_name="departure date/time")',
    samplers = 'models.TextField(blank=True, null=True)',
    notes = 'models.TextField(blank=True, null=True)',

    age_thresh_0_1 = 'models.IntegerField(blank=True, null=True, verbose_name=_("salmon site-specific age threshold (0+ to 1+)"))',
    age_thresh_1_2 = 'models.IntegerField(blank=True, null=True, verbose_name=_("salmon site-specific age threshold (1+ to 2+)"))',

    # electro
    crew_probe = 'models.CharField(max_length=255, blank=True, null=True, verbose_name=_("crew (probe)"))',
    crew_seine = 'models.CharField(max_length=255, blank=True, null=True, verbose_name=_("crew (seine)"))',
    crew_dipnet = 'models.CharField(max_length=255, blank=True, null=True, verbose_name=_("crew (dipnet)"))',
    crew_extras = 'models.CharField(max_length=255, blank=True, null=True, verbose_name=_("crew (extras)"))',

    # site description
    percent_riffle = 'models.IntegerField(blank=True, null=True, verbose_name=_("riffle"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_run = 'models.IntegerField(blank=True, null=True, verbose_name=_("run"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_flat = 'models.IntegerField(blank=True, null=True, verbose_name=_("flat"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_pool = 'models.IntegerField(blank=True, null=True, verbose_name=_("pool"), validators=(MinValueValidator(0), MaxValueValidator(100)))',

    bank_length_left = 'models.FloatField(null=True, blank=True, verbose_name=_("bank length - left (m)"))',
    bank_length_right = 'models.FloatField(null=True, blank=True, verbose_name=_("bank length - right (m)"))',
    width_lower = 'models.FloatField(null=True, blank=True, verbose_name=_("width - lower (m)"))',
    depth_1_lower = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #1 - lower (cm)"))',
    depth_2_lower = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #2 - lower (cm)"))',
    depth_3_lower = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #3 - lower (cm)"))',
    width_middle = 'models.FloatField(null=True, blank=True, verbose_name=_("width - middle (m)"))',
    depth_1_middle = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #1 - middle (cm)"))',
    depth_2_middle = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #2 - middle (cm)"))',
    depth_3_middle = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #3 - middle (cm)"))',
    width_upper = 'models.FloatField(null=True, blank=True, verbose_name=_("width - upper (m)"))',
    depth_1_upper = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #1 - upper (cm)"))',
    depth_2_upper = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #2 - upper (cm)"))',
    depth_3_upper = 'models.FloatField(null=True, blank=True, verbose_name=_("depth #3 - upper (cm)"))',
    max_depth = 'models.FloatField(null=True, blank=True, verbose_name=_("max depth (cm)"), help_text=_("max depth found within the whole site"))',

    # temp/climate data
    air_temp_arrival = 'models.FloatField(null=True, blank=True, verbose_name="air temperature on arrival(°C)")',
    min_air_temp = 'models.FloatField(null=True, blank=True, verbose_name="minimum air temperature (°C)")',
    max_air_temp = 'models.FloatField(null=True, blank=True, verbose_name="maximum air temperature (°C)")',
    percent_cloud_cover = 'models.FloatField(null=True, blank=True, verbose_name="cloud cover", validators=[MinValueValidator(0), MaxValueValidator(1)])',
    precipitation_category = 'models.IntegerField(blank=True, null=True, choices=model_choices.precipitation_category_choices)',
    precipitation_comment = 'models.CharField(max_length=255, blank=True, null=True)',
    wind_speed = 'models.IntegerField(blank=True, null=True, choices=model_choices.wind_speed_choices)',
    wind_direction = 'models.IntegerField(blank=True, null=True, choices=model_choices.wind_direction_choices)',

    # water data
    water_depth_m = 'models.FloatField(null=True, blank=True, verbose_name="water depth (m)")',
    water_level_delta_m = 'models.FloatField(null=True, blank=True, verbose_name="water level delta (m)")',
    discharge_m3_sec = 'models.FloatField(null=True, blank=True, verbose_name="discharge (m3/s)")',
    water_temp_c = 'models.FloatField(null=True, blank=True, verbose_name="water temperature (°C)")',
    water_temp_trap_c = 'models.FloatField(null=True, blank=True, verbose_name="water temperature at trap (°C)")',
    water_cond = 'models.FloatField(null=True, blank=True, verbose_name="specific conductivity (µS)", ...',
    water_ph = 'models.FloatField(null=True, blank=True, verbose_name="water acidity (pH)")',
    overhanging_veg_left = 'models.FloatField(blank=True, null=True, verbose_name=_("Overhanging Vegetation (%) - Left"),...',
    overhanging_veg_right = 'models.FloatField(blank=True, null=True, verbose_name=_("Overhanging Vegetation (%) - Right"),...',
    max_overhanging_veg_left = 'models.FloatField(blank=True, null=True, verbose_name=_("Max Overhanging Vegetation (m) - Left"))',
    max_overhanging_veg_right = 'models.FloatField(blank=True, null=True, verbose_name=_("Max Overhanging Vegetation (m) - Right"))',

    # substrate
    percent_fine = 'models.FloatField(blank=True, null=True, verbose_name=_("fine silt or clay"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_sand = 'models.FloatField(blank=True, null=True, verbose_name=_("sand"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_gravel = 'models.FloatField(blank=True, null=True, verbose_name=_("gravel"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_pebble = 'models.FloatField(blank=True, null=True, verbose_name=_("pebble"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_cobble = 'models.FloatField(blank=True, null=True, verbose_name=_("cobble"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_rocks = 'models.FloatField(blank=True, null=True, verbose_name=_("rocks"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_boulder = 'models.FloatField(blank=True, null=True, verbose_name=_("boulder"), validators=(MinValueValidator(0), MaxValueValidator(100)))',
    percent_bedrock = 'models.FloatField(blank=True, null=True, verbose_name=_("bedrock"), validators=(MinValueValidator(0), MaxValueValidator(100)))',

    # rst
    rpm_arrival = 'models.FloatField(null=True, blank=True, verbose_name="RPM at start")',
    rpm_departure = 'models.FloatField(null=True, blank=True, verbose_name="RPM at end")',
    time_released = 'models.DateTimeField(verbose_name="time released", blank=True, null=True)',
    operating_condition = 'models.IntegerField(blank=True, null=True, choices=model_choices.operating_condition_choices)',
    operating_condition_comment = 'models.CharField(max_length=255, blank=True, null=True)',

    # ef
    seine_type = 'models.IntegerField(blank=True, null=True, choices=model_choices.seine_type_choices, verbose_name=_("type of seine"))',
    site_type = 'models.IntegerField(blank=True, null=True, choices=model_choices.site_type_choices, verbose_name=_("type of site"))',
    electrofisher = 'models.ForeignKey(Electrofisher, related_name="samples", on_delete=models.DO_NOTHING, verbose_name=_("electrofisher"), null=True)',
    electrofisher_voltage = 'models.FloatField(null=True, blank=True, verbose_name=_("electrofisher voltage (V)"))',
    electrofisher_output_low = 'models.FloatField(null=True, blank=True, verbose_name=_("electrofisher output, low (amps)"))',
    electrofisher_output_high = 'models.FloatField(null=True, blank=True, verbose_name=_("electrofisher output, high (amps)"))',
    electrofisher_frequency = 'models.FloatField(null=True, blank=True, verbose_name=_("electrofisher frequency (Hz)"))',
    electrofisher_pulse_type = 'models.IntegerField(blank=True, null=True, choices=model_choices.pulse_type_choices, verbose_name=_("type of pulse"))',
    duty_cycle = 'models.IntegerField(blank=True, null=True, verbose_name=_("duty cycle (%)"), validators=(MinValueValidator(0), MaxValueValidator(100)))',

    # non-editable
    created_by = "models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_sample_created_by')",
    updated_by = "models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_sample_updated_by')",
    season = 'models.IntegerField(null=True, blank=True, editable=False)',
    is_reviewed = 'models.BooleanField(default=False, editable=False, verbose_name=_("Has been reviewed?"))',
    reviewed_by = "models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_reviewed_by')",
    reviewed_at = 'models.DateTimeField(blank=True, null=True, editable=False)',
    old_id = 'models.CharField(max_length=25, null=True, blank=True, editable=False, unique=True)',
    
    
    # PROPERTIES
    julian_day = '@property ...',
    reviewed_status = '@property ...',
    full_wetted_width = '@property ...',
    get_full_wetted_width = '@property ...',
    substrate_profile = '@property ...',
    site_profile = '@property ...',
    duration = '@property ...',
    species_list = '@property ...',
    tag_list = '@property ...',
    arrival_departure = '@property ...',
    air_temp = '@property ...',
    water_depth_display = '@property ...',
    rpms = '@property ...',
    overhanging_veg_display = '@property ...',
    max_overhanging_veg_display = '@property ...',
    crew_display = '@property ...',
    wind = '@property ...',
    water_temp = '@property ...',
    electrofisher_params = '@property ...'
)

In [11]:
# run_process_fish()

sample_kwargs = {
    "old_id": 'old_id',
    "site": 'site',
    "sample_type": '2',
    "arrival_date": 'start_date',
    "departure_date": 'start_date',
    "percent_riffle": 'r["TOS1"]',
    "percent_run": 'r["TOS2"]',
    "percent_flat": 'r["TOS3"]',
    "percent_pool": 'r["TOS4"]',
    "electrofisher": 'efisher',
    "bank_length_left": 'r["LENGTH_LEFT_BANK"]',
    "bank_length_right": 'r["LENGTH_RIGHT_BANK"]',
    "width_lower": 'r["WIDTH_LOWER"]',
    "width_middle": 'r["WIDTH_MIDDLE"]',
    "width_upper": 'r["WIDTH_UPPER"]',
    "depth_1_lower": 'r["DEPTHA1"]',
    "depth_2_lower": 'r["DEPTHA2"]',
    "depth_3_lower": 'r["DEPTHA3"]',
    "depth_1_middle": 'r["DEPTHB1"]',
    "depth_2_middle": 'r["DEPTHB2"]',
    "depth_3_middle": 'r["DEPTHB3"]',
    "depth_1_upper": 'r["DEPTHC1"]',
    "depth_2_upper": 'r["DEPTHC2"]',
    "depth_3_upper": 'r["DEPTHC3"]',
    "max_depth": 'r["DEPTH_MAX"]',
    "air_temp_arrival": 'r["AIR_TEMPERATURE"]',
    "water_cond": 'r["WATER_CONDUCTIVITY"]',
    "water_ph": 'r["WATER_PH"]',
    
    "percent_fine": 'r["SUB_TYPE_FINES"]',
    "percent_sand": 'r["SUB_TYPE_SAND"]',
    "percent_gravel": 'r["SUB_TYPE_GRAVEL"]',
    "percent_pebble": 'r["SUB_TYPE_PEBBLE"]',
    "percent_cobble": 'r["SUB_TYPE_COBBLE"]',
    "percent_rocks": 'r["SUB_TYPE_ROCKS"]',
    "percent_boulder": 'r["SUB_TYPE_BOULDER"]',
    "percent_bedrock": 'r["SUB_TYPE_BEDROCK"]',
    "overhanging_veg_left": 'r["L_BK_OVERHANGING_VEG"]',
    "overhanging_veg_right": 'r["R_BK_OVERHANGING_VEG"]',
    "max_overhanging_veg_left": 'r["MAX_OVERHANG_L_BK"]',
    "max_overhanging_veg_right": 'r["MAX_OVERHANG_R_BK"]',
    "electrofisher_voltage": 'r["ELECTROFISHER_FREQUENCY"]',
    "electrofisher_frequency": 'r["ELECTROFISHER_VOLTAGE"]',
    "site_type": '2 if r["BARRIER_PRESENT"] else 1',
    "crew_probe": 'r["CREW_PROBE"]',
    "crew_seine": 'r["CREW_SEINE"]',
    "crew_dipnet": 'r["CREW_DIPNET"]',
    "seine_type": 'r["APRONSEINE_TYPE"]',
    "monitoring_program_id": '1',
    
    "crew_extras": 'extras',
    "electrofisher_output_low": 'output',
    "electrofisher_output_high": 'output',
    "water_temp_c": 'if len(temps): ...',
    "notes": 'notes'
}

In [12]:
df_samp = pd.DataFrame({k: [v] for k, v in sample_dict.items()}, index=['django model']).T

df_samp['IMPORT'] = None
for i, row in df_samp.iterrows():
    try:
        df_samp.loc[i, 'IMPORT'] = sample_kwargs[i]
    except KeyError:
        df_samp.loc[i, 'IMPORT'] = ''

In [13]:
# NOT SKIPPABLE
# these fields can not be skipped 
# note: monitoring_program_id was imported
df_samp[~((df_samp['django model'].str.contains('blank=True')) | (df_samp['django model'].str.contains('default')) | (df_samp['django model'].str.contains('property')))]

Unnamed: 0,django model,IMPORT
site,"models.ForeignKey(RiverSite, related_name='samples', on_delete=models.DO_NOTHING)",site
sample_type,models.IntegerField(choices=model_choices.sample_type_choices),2
monitoring_program,"models.ForeignKey(MonitoringProgram, on_delete=models.DO_NOTHING, verbose_name=_(""monitoring program""), help_text=_(""The sample was collected under which monitoring program""), related_name=""samples"", blank=False, null=True)",
arrival_date,"models.DateTimeField(verbose_name=""arrival date/time"")",start_date
departure_date,"models.DateTimeField(verbose_name=""departure date/time"")",start_date
electrofisher,"models.ForeignKey(Electrofisher, related_name=""samples"", on_delete=models.DO_NOTHING, verbose_name=_(""electrofisher""), null=True)",efisher


In [14]:
# IMPORTED 
df_samp[df_samp.IMPORT != '']

Unnamed: 0,django model,IMPORT
site,"models.ForeignKey(RiverSite, related_name='samples', on_delete=models.DO_NOTHING)",site
sample_type,models.IntegerField(choices=model_choices.sample_type_choices),2
arrival_date,"models.DateTimeField(verbose_name=""arrival date/time"")",start_date
departure_date,"models.DateTimeField(verbose_name=""departure date/time"")",start_date
notes,"models.TextField(blank=True, null=True)",notes
crew_probe,"models.CharField(max_length=255, blank=True, null=True, verbose_name=_(""crew (probe)""))","r[""CREW_PROBE""]"
crew_seine,"models.CharField(max_length=255, blank=True, null=True, verbose_name=_(""crew (seine)""))","r[""CREW_SEINE""]"
crew_dipnet,"models.CharField(max_length=255, blank=True, null=True, verbose_name=_(""crew (dipnet)""))","r[""CREW_DIPNET""]"
crew_extras,"models.CharField(max_length=255, blank=True, null=True, verbose_name=_(""crew (extras)""))",extras
percent_riffle,"models.IntegerField(blank=True, null=True, verbose_name=_(""riffle""), validators=(MinValueValidator(0), MaxValueValidator(100)))","r[""TOS1""]"


In [15]:
# didn't import
df_samp[(df_samp.IMPORT == '')]

Unnamed: 0,django model,IMPORT
monitoring_program,"models.ForeignKey(MonitoringProgram, on_delete=models.DO_NOTHING, verbose_name=_(""monitoring program""), help_text=_(""The sample was collected under which monitoring program""), related_name=""samples"", blank=False, null=True)",
samplers,"models.TextField(blank=True, null=True)",
age_thresh_0_1,"models.IntegerField(blank=True, null=True, verbose_name=_(""salmon site-specific age threshold (0+ to 1+)""))",
age_thresh_1_2,"models.IntegerField(blank=True, null=True, verbose_name=_(""salmon site-specific age threshold (1+ to 2+)""))",
min_air_temp,"models.FloatField(null=True, blank=True, verbose_name=""minimum air temperature (°C)"")",
max_air_temp,"models.FloatField(null=True, blank=True, verbose_name=""maximum air temperature (°C)"")",
percent_cloud_cover,"models.FloatField(null=True, blank=True, verbose_name=""cloud cover"", validators=[MinValueValidator(0), MaxValueValidator(1)])",
precipitation_category,"models.IntegerField(blank=True, null=True, choices=model_choices.precipitation_category_choices)",
precipitation_comment,"models.CharField(max_length=255, blank=True, null=True)",
wind_speed,"models.IntegerField(blank=True, null=True, choices=model_choices.wind_speed_choices)",


In [16]:
# ForeignKey 
df_samp[(df_samp['django model'].str.contains('ForeignKey'))]

Unnamed: 0,django model,IMPORT
site,"models.ForeignKey(RiverSite, related_name='samples', on_delete=models.DO_NOTHING)",site
monitoring_program,"models.ForeignKey(MonitoringProgram, on_delete=models.DO_NOTHING, verbose_name=_(""monitoring program""), help_text=_(""The sample was collected under which monitoring program""), related_name=""samples"", blank=False, null=True)",
electrofisher,"models.ForeignKey(Electrofisher, related_name=""samples"", on_delete=models.DO_NOTHING, verbose_name=_(""electrofisher""), null=True)",efisher
created_by,"models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_sample_created_by')",
updated_by,"models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_sample_updated_by')",
reviewed_by,"models.ForeignKey(User, on_delete=models.DO_NOTHING, blank=True, null=True, editable=False, related_name='trapnet_reviewed_by')",


In [17]:
# anything imported but not in this table?
    
for i in sample_kwargs:
    if i in df_samp.index:
        print('FOUND:   ', i)
    else:
        print('---------------\nMISSING:     ', i, '\n---------------')

FOUND:    old_id
FOUND:    site
FOUND:    sample_type
FOUND:    arrival_date
FOUND:    departure_date
FOUND:    percent_riffle
FOUND:    percent_run
FOUND:    percent_flat
FOUND:    percent_pool
FOUND:    electrofisher
FOUND:    bank_length_left
FOUND:    bank_length_right
FOUND:    width_lower
FOUND:    width_middle
FOUND:    width_upper
FOUND:    depth_1_lower
FOUND:    depth_2_lower
FOUND:    depth_3_lower
FOUND:    depth_1_middle
FOUND:    depth_2_middle
FOUND:    depth_3_middle
FOUND:    depth_1_upper
FOUND:    depth_2_upper
FOUND:    depth_3_upper
FOUND:    max_depth
FOUND:    air_temp_arrival
FOUND:    water_cond
FOUND:    water_ph
FOUND:    percent_fine
FOUND:    percent_sand
FOUND:    percent_gravel
FOUND:    percent_pebble
FOUND:    percent_cobble
FOUND:    percent_rocks
FOUND:    percent_boulder
FOUND:    percent_bedrock
FOUND:    overhanging_veg_left
FOUND:    overhanging_veg_right
FOUND:    max_overhanging_veg_left
FOUND:    max_overhanging_veg_right
FOUND:    electrofishe