In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
import seaborn as sns

sns.set_theme()

# jupyter notebook full-width display
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))

# no text wrapping
display(HTML("<style>.dataframe td { white-space: nowrap; }</style>"))

# pandas formatting
pd.set_option('display.float_format', '{:.3f}'.format)
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 200)
pd.set_option('display.max_colwidth', 400)

In [2]:
df_SD = pd.read_pickle('df_SD.pickle')
df_FD = pd.read_pickle('df_FD.pickle')
df_LF = pd.read_pickle('df_LF.pickle')
df_LF_grouped = pd.read_pickle('df_LF_grouped.pickle')
df_Site = pd.read_pickle('df_Site.pickle')
df_TrapSupervisors = pd.read_pickle('df_TrapSupervisors.pickle')

# checking stuff

### why don't these match?
* answer: they do, but there is no matching sample to link them

In [3]:
df_LF[(df_LF.yy==2019) & (df_LF.mm==5) & (df_LF.dd==28) & (df_LF.site=="47")]

Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
11195,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,215,1,215,2019-05-28,47,,,2019052847,21.5,,,,True
11196,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,220,6,220,2019-05-28,47,,,2019052847,22.0,,,,True
11197,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,225,16,225,2019-05-28,47,,,2019052847,22.5,,,,True
11198,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,230,26,230,2019-05-28,47,,,2019052847,23.0,,,,True
11199,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,235,39,235,2019-05-28,47,,,2019052847,23.5,,,,True
11200,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,240,51,240,2019-05-28,47,,,2019052847,24.0,,,,True
11201,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,245,35,245,2019-05-28,47,,,2019052847,24.5,,,,True
11202,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,250,27,250,2019-05-28,47,,,2019052847,25.0,,,,True
11203,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,255,17,255,2019-05-28,47,,,2019052847,25.5,,,,True
11204,2019,5,28,,MARGAREE,5,47,UPPER,PM,80.9,36.696,260,5,260,2019-05-28,47,,,2019052847,26.0,,,,True


In [4]:
df_SD[(df_SD.DATETIME.dt.year==2019) & (df_SD.DATETIME.dt.month==5) & (df_SD.DATETIME.dt.day==28)]

Unnamed: 0,DIST,RIVER,NAME,code,GEAR,SITE_NO,no_nets,YEAR,MM,DD,Week,catch_lbs,catch_kg,hours_fished,zone,last_name,comments,bycatch_sbass,bycatch_shad,bycatch_other,DATETIME,SITE1,SITE2,remarks,id,total_fish_preserved,total_fish_measured,AM_PM_PERIOD,wt_lbs,FLAG_DATETIME,FLAG_HOURS_FISHED,FLAG_SITE,FLAG_AM_PM_PERIOD_DISCREPANCIES
14959,2,SWMARG,Martin E Cameron,,81,12,1.0,2019,5,28,5,350.0,158.8,5,lower,Cameron,,,,,2019-05-28,12,,,2019052812,31.0,195.0,AM,71.9,,,,
14960,2,SWMARG,Robert Peters,,81,25,1.0,2019,5,28,5,25.0,11.3,8,lower,Peters,,,,,2019-05-28,25,,,2019052825,,,,,,,,
14961,2,SWMARG,Pierre Chiasson,,81,26,1.0,2019,5,28,5,200.0,90.7,7,lower,Chiasson,,,,,2019-05-28,26,,,2019052826,,,,,,,,
14962,2,SWMARG,Gerard MacFarlane,,81,28,1.0,2019,5,28,5,200.0,90.7,8,upper,MacFarlane,,,,1 perch,2019-05-28,28,,bycatch_other: 1 perch,2019052828,,,,,,,,
14963,2,SWMARG,Daniel Stewart,,81,33,1.0,2019,5,28,5,150.0,68.0,8,upper,Stewart,,,,,2019-05-28,33,,,2019052833,,,,,,,,
14964,2,SWMARG,Stewart Gillis,,81,41,1.0,2019,5,28,5,30.0,13.6,7,upper,Gillis,,,,,2019-05-28,41,,,2019052841,,,,,,,,
14965,2,SWMARG,Finley Stewart,,81,44,,2019,5,28,5,300.0,136.1,14,upper,Stewart,,,,,2019-05-28,44,,,2019052844,,,,,,,,
14966,2,SWMARG,Mary E Gillis,,81,49,1.0,2019,5,28,5,700.0,317.5,14,upper,Gillis,,,,,2019-05-28,49,,,2019052849,,,,,,,,
14967,2,SWMARG,Bruce MacLellan,,81,51,1.0,2019,5,28,5,15.0,6.8,6,upper,MacLellan,,,,,2019-05-28,51,,,2019052851,,,,,,,,
14968,2,SWMARG,Elizabeth MacKinnon,,81,62,1.0,2019,5,28,5,500.0,226.8,10,upper,MacKinnon,,,,,2019-05-28,62,,,2019052862,36.0,203.0,AM,74.1,,,,


### kevin followup question: how many ambiguous matches don't have a matching sample because there are multiple sample matches

In [5]:
ambiguous = set()
for i in list(df_SD[df_SD.id>2024000000].id):
    while i > 2024000000:
        i -= 1000000000
    ambiguous.add(i)
    
ambiguous = list(ambiguous)

In [6]:
# EXACT MATCHES = NONE

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # no exact matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY) & (df_LF.site==str(SITE))])
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY) & (df_FD.SITE==SITE)])


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


In [7]:
# POTENTIAL MATCHES WITH MESSY SITE NAMES = NONE

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # partial matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY) & (df_LF.site.str.contains(str(SITE))==True)])
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY) & (df_FD.SITE.str.contains(str(SITE))==True)])


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


In [8]:
# A FEW MATCHES WHEN SITE IS NOT CONSIDERED
# maybe there the site number was written down wrong, some partial matches (excluding site)

for ambiguous_sample in ambiguous:
    YEAR, MONTH, DAY, SITE = (
        ambiguous_sample//1000000, 
        ambiguous_sample//10000 - 100*(ambiguous_sample//1000000), 
        ambiguous_sample//100 - 100*(ambiguous_sample//10000), 
        ambiguous_sample - 100*(ambiguous_sample//100)
    )
    print()
    print(YEAR, MONTH, DAY, SITE)
    # very partial matches
    display(df_LF[(df_LF.yy==YEAR) & (df_LF.mm==MONTH) & (df_LF.dd==DAY)].head())
    display(df_FD[(df_FD.YEAR==YEAR) & (df_FD.MM==MONTH) & (df_FD.DD==DAY)].head())


2004 6 10 1


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE



1997 6 12 58


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
3024,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,220,3,220,1997-06-12,26,,,1997061226,22.0,,,,
3025,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,225,4,225,1997-06-12,26,,,1997061226,22.5,,,,
3026,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,230,19,230,1997-06-12,26,,,1997061226,23.0,,,,
3027,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,235,52,235,1997-06-12,26,,,1997061226,23.5,,,,
3028,1997,6,12,,SW MARGAREE,7,26,LOWER,AM,118.0,53.5,240,70,240,1997-06-12,26,,,1997061226,24.0,,,,


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
15016,1997,6,12,7,26,AM,Frozen,1,,230,238.0,171.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,230,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15017,1997,6,12,7,26,AM,Frozen,2,,224,232.0,166.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,224,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15018,1997,6,12,7,26,AM,Frozen,3,,272,280.0,307.0,B,F,4,39.9,,7,3,,,,,,,,,,,1997-06-12,7,3,,,,,26,26,,,272,SITE_notes: 26; AGE_notes_1: 7; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15019,1997,6,12,7,26,AM,Frozen,4,,212,220.0,138.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,212,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,
15020,1997,6,12,7,26,AM,Frozen,5,,220,228.0,143.0,A,M,4,,,3,3,,,,,,,,,,,1997-06-12,3,3,,,,,26,26,,,220,SITE_notes: 26; AGE_notes_1: 3; FSP_notes_1: 3,1997061226,,,,,,,,,,,,,,



1988 5 23 48


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
4503,1988,5,23,,12,,Fresh,1,253,,253.0,208.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,253,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4504,1988,5,23,,12,,Fresh,2,258,,258.0,236.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,258,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4505,1988,5,23,,12,,Fresh,3,245,,245.0,,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,245,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4506,1988,5,23,,12,,Fresh,4,250,,250.0,234.0,A,M,4,,,4,4,,,,,,,,,,,1988-05-23,4,4,,,,,12,12,,,250,SITE_notes: 12; AGE_notes_1: 4; FSP_notes_1: 4,1988052312,,,,,,,,,,,,,,
4507,1988,5,23,,12,,Fresh,5,245,,245.0,209.0,A,F,4,29.1,,3,3,,,,,,,,,,,1988-05-23,3,3,,,,,12,12,,,245,SITE_notes: 12; AGE_notes_1: 3; FSP_notes_1: 3,1988052312,,,,,,,,,,,,,,



1988 4 0 37


Unnamed: 0,yy,mm,dd,Time,river,week,site,loc,period,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,id,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


Unnamed: 0,YEAR,MM,DD,WEEK,SITE,PERIOD,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE


### how do we flag and leave the potential to merge ghost/ambiguous samples?

In [9]:
id = 1990061312
date = id // 100
date

19900613

In [10]:
from datetime import datetime
datetime.strptime(str(id//100), "%Y%m%d").date()

datetime.date(1990, 6, 13)

In [11]:
id = 4990061312
date = id // 100
while date > 20240000:
    date -= 10000000
datetime.strptime(str(date), "%Y%m%d").date()

datetime.date(1990, 6, 13)

In [12]:
sum(df_LF.id.isna()), sum(df_FD.id.isna())

(0, 0)

In [13]:
id - 100*(id//100)

12

# When making ghost samples, will any of our ambiguous id match between fish details and length frequencies?

In [14]:
ambiguous_FD = set(df_FD[df_FD.id>2024000000].id)
ambiguous_LF = set(df_LF[df_LF.id>2024000000].id)
ambiguous_SD = set(df_SD[df_SD.id>2024000000].id)
# yes, we need to further disambiguate: maybe add 20/40 to months, this number would never occur naturally

In [15]:
# let's make a summary of ambiguous dates

df_A_FD = pd.DataFrame(ambiguous_FD, columns=['id_FD'])
df_A_FD['date'] = df_A_FD['id_FD'] // 100
while df_A_FD.date.max() > 20240000:
    df_A_FD.loc[df_A_FD.date>20240000, 'date'] -= 10000000
    
df_A_LF = pd.DataFrame(ambiguous_LF, columns=['id_LF'])
df_A_LF['date'] = df_A_LF['id_LF'] // 100
while df_A_LF.date.max() > 20240000:
    df_A_LF.loc[df_A_LF.date>20240000, 'date'] -= 10000000
    
df_A_SD = pd.DataFrame(ambiguous_SD, columns=['id_SD'])
df_A_SD['date'] = df_A_SD['id_SD'] // 100
while df_A_SD.date.max() > 20240000:
    df_A_SD.loc[df_A_SD.date>20240000, 'date'] -= 10000000

# group and split into columns
df_A_FD = df_A_FD.groupby('date')['id_FD'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_FD.columns = ['id_FD' for _ in range(df_A_FD.shape[1])]

df_A_LF = df_A_LF.groupby('date')['id_LF'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_LF.columns = ['id_LF' for _ in range(df_A_LF.shape[1])]

df_A_SD = df_A_SD.groupby('date')['id_SD'].apply(lambda x: pd.Series(x.values)).unstack()
df_A_SD.columns = ['id_SD' for _ in range(df_A_SD.shape[1])]

# create a dataframe summarising ambiguous data
df_AMBIGUOUS = pd.merge(
    df_A_FD,
    pd.merge(
        df_A_SD,
        df_A_LF,
        on='date', 
        how='outer'
    ),
    on='date', 
    how='outer'
).sort_index().astype('Int64')

df_AMBIGUOUS.loc[df_AMBIGUOUS['id_FD'].any(axis=1), 'FLAG_FD_AMBIGUOUS'] = True
df_AMBIGUOUS.loc[df_AMBIGUOUS['id_LF'].any(axis=1), 'FLAG_LF_AMBIGUOUS'] = True
df_AMBIGUOUS.loc[df_AMBIGUOUS['id_SD'].any(axis=1), 'FLAG_SD_AMBIGUOUS'] = True

df_AMBIGUOUS['FLAG_AMBIGUITY_OVERLAP'] = df_AMBIGUOUS.FLAG_FD_AMBIGUOUS.notnull()*1 + df_AMBIGUOUS.FLAG_LF_AMBIGUOUS.notnull()*1 + df_AMBIGUOUS.FLAG_SD_AMBIGUOUS.notnull()*1

In [16]:
# these are our problems if we are making ghost samples

df_AMBIGUOUS.loc[df_AMBIGUOUS.FLAG_AMBIGUITY_OVERLAP > 1].dropna(axis=1).T

date,19980501,20000607,20000609,20010516,20090520,20090522,20100511,20100519,20100527
id_FD,2998050105,3000060726,4000060926,3001051605,4009052026,3009052226,3010051126,3010051926,3010052726
id_LF,3998050105,4000060726,3000060926,3001051605,3009052026,3009052226,3010051126,3010051926,3010052726
id_LF,2998050105,3000060726,4000060926,4001051605,4009052026,4009052226,4010051126,4010051926,4010052726
FLAG_FD_AMBIGUOUS,True,True,True,True,True,True,True,True,True
FLAG_LF_AMBIGUOUS,True,True,True,True,True,True,True,True,True
FLAG_AMBIGUITY_OVERLAP,2,2,2,2,2,2,2,2,2


# Potential Sorting Issues? LF? FD?
* Yes, can't disambiguate without making guesses. Left flagged, and will be rejected on import.

In [17]:
for i in ambiguous_FD:
    display(df_FD[df_FD.id==i][['YEAR', 'MM', 'DD', 'SITE', 'PERIOD', 'FISH_NO', 'id']].head(1))

Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35924,1989,5,14,12,AM,1,2989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36560,1989,5,14,12,PM,1,3989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35997,1993,5,29,52,AM,1,2993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36730,2009,5,20,26,PM,1,4009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36647,2000,6,9,26,PM,1,4000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36110,2000,6,7,26,AM,1,3000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36484,2014,5,30,47,AM,1,3014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36327,2010,5,11,26,AM,1,3010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36059,1998,5,1,5,PM,1,2998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36607,2000,6,7,26,PM,1,4000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36790,2010,5,11,26,PM,1,4010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36882,2014,5,30,47 or 62,PM,1,4014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36421,2010,5,29,25,PM,1,3010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35971,1993,5,27,37,AM,1,2993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36264,2009,5,22,26,AM,1,3009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36760,2009,5,22,26,PM,1,4009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36033,1993,6,9,33,AM,1,2993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
35878,1989,5,13,35,AM,1,2989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36514,1989,5,13,35,PM,1,3989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36452,2010,6,11,41,AM,1,3010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36188,2001,5,16,5,PM,1,3001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36355,2010,5,19,26,AM,1,3010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36684,2001,5,16,526,AM,1,4001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36817,2010,5,19,26,PM,1,4010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36294,2010,5,5,41,PM,1,3010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36392,2010,5,27,26,AM,1,3010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36853,2010,5,27,26,PM,1,4010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36234,2009,5,20,26,AM,1,3009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO,id
36151,2000,6,9,26,AM,1,3000060926


In [18]:
# CONFIRM
# some have am / pm that could be used to match with LF manually if that info is there as well???
for i in ambiguous_FD:
    print('\n', i)
    display(df_FD[df_FD.id==i][['YEAR', 'MM', 'DD', 'SITE', 'PERIOD', 'FISH_NO']].head())


 2989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35924,1989,5,14,12,AM,1
35925,1989,5,14,12,AM,2
35926,1989,5,14,12,AM,3
35927,1989,5,14,12,AM,4
35928,1989,5,14,12,AM,5



 3989051412


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36560,1989,5,14,12,PM,1
36561,1989,5,14,12,PM,2
36562,1989,5,14,12,PM,3
36563,1989,5,14,12,PM,4
36564,1989,5,14,12,PM,5



 2993052952


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35997,1993,5,29,52,AM,1
35998,1993,5,29,52,AM,2
35999,1993,5,29,52,AM,3
36000,1993,5,29,52,AM,4
36001,1993,5,29,52,AM,5



 4009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36730,2009,5,20,26,PM,1
36731,2009,5,20,26,PM,2
36732,2009,5,20,26,PM,3
36733,2009,5,20,26,PM,4
36734,2009,5,20,26,PM,5



 4000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36647,2000,6,9,26,PM,1
36648,2000,6,9,26,PM,2
36649,2000,6,9,26,PM,3
36650,2000,6,9,26,PM,4
36651,2000,6,9,26,PM,5



 3000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36110,2000,6,7,26,AM,1
36111,2000,6,7,26,AM,2
36112,2000,6,7,26,AM,3
36113,2000,6,7,26,AM,4
36114,2000,6,7,26,AM,5



 3014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36484,2014,5,30,47,AM,1
36485,2014,5,30,47,AM,2
36486,2014,5,30,47,AM,3
36487,2014,5,30,47,AM,4
36488,2014,5,30,47,AM,5



 3010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36327,2010,5,11,26,AM,1
36328,2010,5,11,26,AM,2
36329,2010,5,11,26,AM,3
36330,2010,5,11,26,AM,4
36331,2010,5,11,26,AM,5



 2998050105


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36059,1998,5,1,5,PM,1
36060,1998,5,1,5,PM,1
36061,1998,5,1,5,PM,2
36062,1998,5,1,5,PM,2
36063,1998,5,1,5,PM,3



 4000060726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36607,2000,6,7,26,PM,1
36608,2000,6,7,26,PM,2
36609,2000,6,7,26,PM,3
36610,2000,6,7,26,PM,4
36611,2000,6,7,26,PM,5



 4010051126


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36790,2010,5,11,26,PM,1
36791,2010,5,11,26,PM,2
36792,2010,5,11,26,PM,3
36793,2010,5,11,26,PM,4
36794,2010,5,11,26,PM,5



 4014053047


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36882,2014,5,30,47 or 62,PM,1
36883,2014,5,30,47 or 62,PM,2
36884,2014,5,30,47 or 62,PM,3
36885,2014,5,30,47 or 62,PM,4
36886,2014,5,30,47 or 62,PM,5



 3010052925


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36421,2010,5,29,25,PM,1
36422,2010,5,29,25,PM,2
36423,2010,5,29,25,PM,3
36424,2010,5,29,25,PM,4
36425,2010,5,29,25,PM,5



 2993052737


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35971,1993,5,27,37,AM,1
35972,1993,5,27,37,AM,2
35973,1993,5,27,37,AM,3
35974,1993,5,27,37,AM,4
35975,1993,5,27,37,AM,5



 3009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36264,2009,5,22,26,AM,1
36265,2009,5,22,26,AM,2
36266,2009,5,22,26,AM,3
36267,2009,5,22,26,AM,4
36268,2009,5,22,26,AM,5



 4009052226


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36760,2009,5,22,26,PM,1
36761,2009,5,22,26,PM,2
36762,2009,5,22,26,PM,3
36763,2009,5,22,26,PM,4
36764,2009,5,22,26,PM,5



 2993060933


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36033,1993,6,9,33,AM,1
36034,1993,6,9,33,AM,2
36035,1993,6,9,33,AM,3
36036,1993,6,9,33,AM,4
36037,1993,6,9,33,AM,5



 2989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
35878,1989,5,13,35,AM,1
35879,1989,5,13,35,AM,2
35880,1989,5,13,35,AM,3
35881,1989,5,13,35,AM,4
35882,1989,5,13,35,AM,5



 3989051335


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36514,1989,5,13,35,PM,1
36515,1989,5,13,35,PM,2
36516,1989,5,13,35,PM,3
36517,1989,5,13,35,PM,4
36518,1989,5,13,35,PM,5



 3010061141


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36452,2010,6,11,41,AM,1
36453,2010,6,11,41,AM,2
36454,2010,6,11,41,AM,3
36455,2010,6,11,41,AM,4
36456,2010,6,11,41,AM,5



 3001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36188,2001,5,16,5,PM,1
36189,2001,5,16,5,PM,2
36190,2001,5,16,5,PM,3
36191,2001,5,16,5,PM,4
36192,2001,5,16,5,PM,5



 3010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36355,2010,5,19,26,AM,1
36356,2010,5,19,26,AM,2
36357,2010,5,19,26,AM,3
36358,2010,5,19,26,AM,4
36359,2010,5,19,26,AM,5



 4001051605


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36684,2001,5,16,526,AM,1
36685,2001,5,16,526,AM,2
36686,2001,5,16,526,AM,3
36687,2001,5,16,526,AM,4
36688,2001,5,16,526,AM,5



 4010051926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36817,2010,5,19,26,PM,1
36818,2010,5,19,26,PM,2
36819,2010,5,19,26,PM,3
36820,2010,5,19,26,PM,4
36821,2010,5,19,26,PM,5



 3010050541


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36294,2010,5,5,41,PM,1
36295,2010,5,5,41,PM,2
36296,2010,5,5,41,PM,3
36297,2010,5,5,41,PM,4
36298,2010,5,5,41,PM,5



 3010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36392,2010,5,27,26,AM,1
36393,2010,5,27,26,AM,2
36394,2010,5,27,26,AM,3
36395,2010,5,27,26,AM,4
36396,2010,5,27,26,AM,5



 4010052726


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36853,2010,5,27,26,PM,1
36854,2010,5,27,26,PM,2
36855,2010,5,27,26,PM,3
36856,2010,5,27,26,PM,4
36857,2010,5,27,26,PM,5



 3009052026


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36234,2009,5,20,26,AM,1
36235,2009,5,20,26,AM,2
36236,2009,5,20,26,AM,3
36237,2009,5,20,26,AM,4
36238,2009,5,20,26,AM,5



 3000060926


Unnamed: 0,YEAR,MM,DD,SITE,PERIOD,FISH_NO
36151,2000,6,9,26,AM,1
36152,2000,6,9,26,AM,2
36153,2000,6,9,26,AM,3
36154,2000,6,9,26,AM,4
36155,2000,6,9,26,AM,5


In [19]:
# check ambiguous AM PM for LF and FD and look for extra matches (NOTE, none match samples/logbook)
# [x for x in df_AMBIGUOUS[['id_LF']].values.T.ravel().tolist() if not pd.isna(x)]

period_check = pd.merge(
    df_FD.loc[df_FD.id.isin([x for x in df_AMBIGUOUS[['id_FD']].values.T.ravel().tolist() if not pd.isna(x)])].groupby(['id', 'PERIOD']).count().reset_index()[['id', 'PERIOD']],
    df_LF.loc[df_LF.id.isin([x for x in df_AMBIGUOUS[['id_LF']].values.T.ravel().tolist() if not pd.isna(x)])].groupby(['id', 'period']).count().reset_index()[['id', 'period']],
    on='id',
    how='outer'
)

period_check['id'] -= 1000000000
period_check.loc[period_check.id > 2024000000, 'id'] -= 1000000000

period_check = period_check.sort_values(['id', 'PERIOD']).reset_index(drop=True)

In [20]:
id_period_ambiguity = list(period_check.dropna()[period_check.dropna().duplicated('id', keep=False)].id.unique())

check_ids = [x+1000000000 for x in id_period_ambiguity] + [x+2000000000 for x in id_period_ambiguity]

In [21]:
df_LF[df_LF.id.isin(check_ids)].groupby(['id', 'period']).first().sort_values('DATETIME')

Unnamed: 0_level_0,Unnamed: 1_level_0,yy,mm,dd,Time,river,week,site,loc,wt_lbs,wt_kg,lgth,freq,Flbin,DATETIME,SITE1,SITE2,SITE3,length_bin_id,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
id,period,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
3000060726,AM,2000,6,7,,SW MARGAREE,6,26,LOWER,158.0,71.7,215,1,215,2000-06-07 00:00:00,26,,,21.5,,,,
4000060726,PM,2000,6,7,,SW MARGAREE,6,26,LOWER,158.0,71.7,215,3,215,2000-06-07 00:00:00,26,,,21.5,,,,
3000060926,PM,2000,6,9,,SW MARGAREE,6,26,LOWER,141.0,64.0,215,3,215,2000-06-09 00:00:00,26,,,21.5,,,,
4000060926,AM,2000,6,9,,SW MARGAREE,6,26,LOWER,146.0,66.2,215,1,215,2000-06-09 00:00:00,26,,,21.5,,,,
3001051605,AM,2001,5,16,,SW MARGAREE,3,526,LOWER,140.0,63.5,220,1,220,2001-05-16 00:00:00,5,26.0,,22.0,True,,,
4001051605,PM,2001,5,16,,SW MARGAREE,3,5,LOWER,154.0,69.9,225,2,225,2001-05-16 00:00:00,5,,,22.5,,,,
3009052026,AM,2009,5,20,1255.0,SW MARGAREE,4,26,LOWER,139.0,63.0,220,1,220,2009-05-20 12:55:00,26,,,22.0,,,,
4009052026,PM,2009,5,20,1405.0,SW MARGAREE,4,26,LOWER,96.0,43.5,225,1,225,2009-05-20 14:05:00,26,,,22.5,,,,
3009052226,AM,2009,5,22,1310.0,SW MARGAREE,4,26,LOWER,128.0,58.1,225,2,225,2009-05-22 13:10:00,26,,,22.5,,,,
4009052226,PM,2009,5,22,1455.0,SW MARGAREE,4,26,LOWER,130.0,59.0,225,3,225,2009-05-22 14:55:00,26,,,22.5,,,,


In [22]:
df_FD[df_FD.id.isin(check_ids)].groupby(['id', 'PERIOD']).first().sort_values('DATETIME')

Unnamed: 0_level_0,Unnamed: 1_level_0,YEAR,MM,DD,WEEK,SITE,CONDITION,FISH_NO,FL_WET,FL_FROZEN,FL_STD,WEIGHT,SPECIES,SEX,MATURITY,GONAD_WEIGHT,Ager_1,AGE_1,FSP_1,Comments_1,Ager_2,AGE_2,FSP_2,Comments_2,Ager_3,AGE_3,FSP_3,Comments_3,Envelop.Comments,DATETIME,AGE_notes_1,FSP_notes_1,AGE_notes_2,FSP_notes_2,AGE_notes_3,FSP_notes_3,SITE_notes,SITE1,SITE2,SITE3,fish_length,remarks,FLAG_SITE,FLAG_AM_PM_PERIOD,FLAG_SEX,FLAG_MATURITY,FLAG_FSP_1,FLAG_FL_STD,FLAG_FL_WET_FROZEN,FLAG_WEIGHT_OUTLIER,FLAG_GONAD_OUTLIER,FLAG_MULTIPLE_SAMPLE_POSSIBILITIES,FLAG_MISNUMBERED_FISH_DETAILS,FLAG_LEN_WT_RATIO_OUTLIER,FLAG_AM_PM_PERIOD_DISCREPANCIES,FLAG_NO_MATCHING_SAMPLE
id,PERIOD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1
3000060726,AM,2000,6,7,6,26,Frozen,1,,250.0,258.0,220.0,B,M,4,32.1,,3.0,9,,,,,,,,,,,2000-06-07,3.0,9,,,,,26,26,,,250,SITE_notes: 26; FSP_notes_1: 9,,,,,,,,,,True,,,,
3000060726,PM,2000,6,7,6,26,Fresh,37,242.0,,242.0,198.0,B,M,4,52.5,,3.0,9,,,,,,,,,,,2000-06-07,3.0,9,,,,,26,26,,,242,SITE_notes: 26; FSP_notes_1: 9,,,,,,,,,,True,,,,
4000060726,PM,2000,6,7,6,26,Fresh,1,252.0,,252.0,226.0,B,F,4,28.8,,3.0,9,,,,,,,,,,,2000-06-07,3.0,9,,,,,26,26,,,252,SITE_notes: 26; FSP_notes_1: 9,,,,,,,,,,True,,,,
3000060926,AM,2000,6,9,6,26,Fresh,1,239.0,,239.0,168.0,B,M,4,32.7,,3.0,0,,,,,,,,,,,2000-06-09,3.0,0,,,,,26,26,,,239,SITE_notes: 26; FSP_notes_1: 0,,,,,,,,,,True,,,,
3000060926,PM,2000,6,9,6,26,Frozen,35,,228.0,236.0,154.8,A,M,4,35.2,,,9,,,,,,,,,,,2000-06-09,,9,,,,,26,26,,,228,SITE_notes: 26,,,,,,,,,,True,,,,
4000060926,PM,2000,6,9,6,26,Frozen,1,,266.0,274.0,284.5,A,F,4,39.1,,6.0,4,,,,,,,,,,,2000-06-09,6.0,4,,,,,26,26,,,266,SITE_notes: 26; AGE_notes_1: 6; FSP_notes_1: 4,,,,,,,,,,True,,,,
3001051605,AM,2001,5,16,3,526,Frozen,46,,217.0,225.0,128.4,A,M,4,,,2.0,2,,,,,,,,,,,2001-05-16,2.0,2,,,,,526,5,26.0,,217,"SITE_notes: 5,26; AGE_notes_1: 2; FSP_notes_1: 2",True,,,,,,,,,True,,,,
3001051605,PM,2001,5,16,3,5,Frozen,1,,276.0,285.0,312.6,A,F,4,53.8,,6.0,4,,,,,,,,,,,2001-05-16,6.0,4,,,,,5,5,,,276,SITE_notes: 5; AGE_notes_1: 6; FSP_notes_1: 4,,,,,,,,,,True,,,,
4001051605,AM,2001,5,16,3,526,Frozen,1,,271.0,279.0,288.0,A,M,4,48.3,,6.0,3,,,,,,,,,,,2001-05-16,6.0,3,,,,,526,5,26.0,,271,"SITE_notes: 5,26; AGE_notes_1: 6; FSP_notes_1: 3",True,,,,,,,,,True,,,,
3009052026,AM,2009,5,20,4,26,Fresh,1,245.0,269.0,245.0,202.5,A,M,4,30.2,,3.0,3,,,,,,,,,,,2009-05-20,3.0,3,,,,,26,26,,,245,SITE_notes: 26,,,,,,,,,,True,,,,


4 ambiguous samples may be linked:

    3010051926	AM
    3010051926	AM
    
    4010051926	PM
    4010051926	PM
    
    3010052726	AM
    3010052726	AM
    
    4010052726	PM
    4010052726	PM
    
Note: these are matched together correctly.

* Therefore, 

        if id.isin([3010051926, 4010051926, 3010052726, 4010052726]):
            don't add anything to months to disambiguate 
            # these id will match up correctly and link to the same ghost sample

* Other id from this set do not match unambiguously using period. There are some inconsistencies.
* In fact, period may not be entirely useful because of the inconsistencies with within ambiguous sets of fish details.

# More Sorting Issue Calcs and Checks

In [23]:
# loop through duplicated ids and look at these by inspection - make sure they all look doubled (except 2018061364)
# the only one that looks like a typo is 2018061364 (fixed at "CONFIRM: fix typo at 10851" in noteboook 10.1)

# from notebook 10.1
duplicated_ids_LF = [1998050105, 2000060726, 2000060926, 2000061538, 2001051605, 2003052326,
                     2009052026, 2009052226, 2010051126, 2010051926, 2010052726, 2018061364]

temp = df_LF.copy()
temp.loc[temp.id>2024000000, 'id'] -= 1000000000
temp.loc[temp.id>2024000000, 'id'] -= 1000000000

for duplicate_id in duplicated_ids_LF:
    idx = temp[temp.id == duplicate_id].index.values[0]
    
    before = 0
    after = temp[temp.id == duplicate_id].shape[0]-1
    
    print('\n', temp[temp.id == duplicate_id]['id'].values[0])
    display(temp.loc[idx-before:idx+after][['id', 'DATETIME', 'site', 'period', 'lgth', 'wt_lbs']])



 1998050105


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
3102,1998050105,1998-05-01,5,PM,235,10.0
3103,1998050105,1998-05-01,5,PM,240,10.0
3104,1998050105,1998-05-01,5,PM,245,10.0
3105,1998050105,1998-05-01,5,PM,250,10.0
3106,1998050105,1998-05-01,5,PM,255,10.0
3107,1998050105,1998-05-01,5,PM,260,10.0
3108,1998050105,1998-05-01,5,PM,265,10.0
3109,1998050105,1998-05-01,5,PM,270,10.0
3110,1998050105,1998-05-01,5,PM,275,10.0
3111,1998050105,1998-05-01,5,PM,280,10.0



 2000060726


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
4156,2000060726,2000-06-07,26,AM,215,158.0
4157,2000060726,2000-06-07,26,AM,220,158.0
4158,2000060726,2000-06-07,26,AM,225,158.0
4159,2000060726,2000-06-07,26,AM,230,158.0
4160,2000060726,2000-06-07,26,AM,235,158.0
4161,2000060726,2000-06-07,26,AM,240,158.0
4162,2000060726,2000-06-07,26,AM,245,158.0
4163,2000060726,2000-06-07,26,AM,250,158.0
4164,2000060726,2000-06-07,26,AM,255,158.0
4165,2000060726,2000-06-07,26,AM,260,158.0



 2000060926


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
4186,2000060926,2000-06-09,26,PM,215,141.0
4187,2000060926,2000-06-09,26,PM,220,141.0
4188,2000060926,2000-06-09,26,PM,225,141.0
4189,2000060926,2000-06-09,26,PM,230,141.0
4190,2000060926,2000-06-09,26,PM,235,141.0
4191,2000060926,2000-06-09,26,PM,240,141.0
4192,2000060926,2000-06-09,26,PM,245,141.0
4193,2000060926,2000-06-09,26,PM,250,141.0
4194,2000060926,2000-06-09,26,PM,255,141.0
4195,2000060926,2000-06-09,26,PM,260,141.0



 2000061538


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
4239,2000061538,2000-06-15,38,PM,205,139.0
4240,2000061538,2000-06-15,38,PM,210,139.0
4241,2000061538,2000-06-15,38,PM,215,139.0
4242,2000061538,2000-06-15,38,PM,220,139.0
4243,2000061538,2000-06-15,38,PM,225,139.0
4244,2000061538,2000-06-15,38,PM,230,139.0
4245,2000061538,2000-06-15,38,PM,235,139.0
4246,2000061538,2000-06-15,38,PM,240,139.0
4247,2000061538,2000-06-15,38,PM,245,139.0
4248,2000061538,2000-06-15,38,PM,250,139.0



 2001051605


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
4266,2001051605,2001-05-16,526,AM,220,140.0
4267,2001051605,2001-05-16,526,AM,225,140.0
4268,2001051605,2001-05-16,526,AM,230,140.0
4269,2001051605,2001-05-16,526,AM,235,140.0
4270,2001051605,2001-05-16,526,AM,240,140.0
4271,2001051605,2001-05-16,526,AM,245,140.0
4272,2001051605,2001-05-16,526,AM,250,140.0
4273,2001051605,2001-05-16,526,AM,255,140.0
4274,2001051605,2001-05-16,526,AM,260,140.0
4275,2001051605,2001-05-16,526,AM,265,140.0



 2003052326


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
5184,2003052326,2003-05-23,26,AM,230,30.0
5185,2003052326,2003-05-23,26,AM,235,30.0
5186,2003052326,2003-05-23,26,AM,240,30.0
5187,2003052326,2003-05-23,26,AM,245,30.0
5188,2003052326,2003-05-23,26,AM,250,30.0
5189,2003052326,2003-05-23,26,AM,255,30.0
5190,2003052326,2003-05-23,26,AM,260,30.0
5191,2003052326,2003-05-23,26,AM,265,30.0
5192,2003052326,2003-05-23,26,AM,270,30.0
5193,2003052326,2003-05-23,26,AM,275,30.0



 2009052026


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
6332,2009052026,2009-05-20 12:55:00,26,AM,220,139.0
6333,2009052026,2009-05-20 12:55:00,26,AM,225,139.0
6334,2009052026,2009-05-20 12:55:00,26,AM,230,139.0
6335,2009052026,2009-05-20 12:55:00,26,AM,235,139.0
6336,2009052026,2009-05-20 12:55:00,26,AM,240,139.0
6337,2009052026,2009-05-20 12:55:00,26,AM,245,139.0
6338,2009052026,2009-05-20 12:55:00,26,AM,250,139.0
6339,2009052026,2009-05-20 12:55:00,26,AM,255,139.0
6340,2009052026,2009-05-20 12:55:00,26,AM,260,139.0
6341,2009052026,2009-05-20 12:55:00,26,AM,265,139.0



 2009052226


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
6368,2009052226,2009-05-22 13:10:00,26,AM,225,128.0
6369,2009052226,2009-05-22 13:10:00,26,AM,230,128.0
6370,2009052226,2009-05-22 13:10:00,26,AM,235,128.0
6371,2009052226,2009-05-22 13:10:00,26,AM,240,128.0
6372,2009052226,2009-05-22 13:10:00,26,AM,245,128.0
6373,2009052226,2009-05-22 13:10:00,26,AM,250,128.0
6374,2009052226,2009-05-22 13:10:00,26,AM,255,128.0
6375,2009052226,2009-05-22 13:10:00,26,AM,260,128.0
6376,2009052226,2009-05-22 13:10:00,26,AM,265,128.0
6377,2009052226,2009-05-22 13:10:00,26,AM,270,128.0



 2010051126


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
6648,2010051126,2010-05-11 14:50:00,26,AM,230,43.0
6649,2010051126,2010-05-11 14:50:00,26,AM,235,43.0
6650,2010051126,2010-05-11 14:50:00,26,AM,240,43.0
6651,2010051126,2010-05-11 14:50:00,26,AM,245,43.0
6652,2010051126,2010-05-11 14:50:00,26,AM,250,43.0
6653,2010051126,2010-05-11 14:50:00,26,AM,255,43.0
6654,2010051126,2010-05-11 14:50:00,26,AM,260,43.0
6655,2010051126,2010-05-11 14:50:00,26,AM,265,43.0
6656,2010051126,2010-05-11 14:50:00,26,AM,270,43.0
6657,2010051126,2010-05-11 14:50:00,26,AM,275,43.0



 2010051926


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
6719,2010051926,2010-05-19 13:45:00,26,AM,225,129.0
6720,2010051926,2010-05-19 13:45:00,26,AM,230,129.0
6721,2010051926,2010-05-19 13:45:00,26,AM,235,129.0
6722,2010051926,2010-05-19 13:45:00,26,AM,240,129.0
6723,2010051926,2010-05-19 13:45:00,26,AM,245,129.0
6724,2010051926,2010-05-19 13:45:00,26,AM,250,129.0
6725,2010051926,2010-05-19 13:45:00,26,AM,255,129.0
6726,2010051926,2010-05-19 13:45:00,26,AM,260,129.0
6727,2010051926,2010-05-19 13:45:00,26,AM,265,129.0
6728,2010051926,2010-05-19 13:45:00,26,AM,270,129.0



 2010052726


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
6846,2010052726,2010-05-27 08:40:00,26,AM,225,117.0
6847,2010052726,2010-05-27 08:40:00,26,AM,230,117.0
6848,2010052726,2010-05-27 08:40:00,26,AM,235,117.0
6849,2010052726,2010-05-27 08:40:00,26,AM,240,117.0
6850,2010052726,2010-05-27 08:40:00,26,AM,245,117.0
6851,2010052726,2010-05-27 08:40:00,26,AM,250,117.0
6852,2010052726,2010-05-27 08:40:00,26,AM,255,117.0
6853,2010052726,2010-05-27 08:40:00,26,AM,260,117.0
6854,2010052726,2010-05-27 08:40:00,26,AM,265,117.0
6855,2010052726,2010-05-27 08:40:00,26,AM,270,117.0



 2018061364


Unnamed: 0,id,DATETIME,site,period,lgth,wt_lbs
10841,2018061364,2018-06-13,64,PM,215,92.0
10842,2018061364,2018-06-13,64,PM,220,92.0
10843,2018061364,2018-06-13,64,PM,225,92.0
10844,2018061364,2018-06-13,64,PM,230,92.0
10845,2018061364,2018-06-13,64,PM,235,92.0
10846,2018061364,2018-06-13,64,PM,240,92.0
10847,2018061364,2018-06-13,64,PM,245,92.0
10848,2018061364,2018-06-13,64,PM,250,92.0
10849,2018061364,2018-06-13,64,PM,255,92.0
10850,2018061364,2018-06-13,64,PM,260,92.0


In [24]:
# it looks like 4018061364 is our only potential error (fixed earlier, now 2018061364)
df_LF.groupby('id').count().sort_values('yy')['yy'].head()

id
2007060235    1
1991051212    3
1994050848    4
2005050501    6
1991051312    6
Name: yy, dtype: int64

In [25]:
# loop through small counts above and look at them
# these look fine, unless there were multiple combined typos
smallgroups = list(df_LF.groupby('id').count().sort_values('yy')['yy'].head().index)

for group in smallgroups:
    idx = df_LF[df_LF.id == group].index.values[0]
    before = 2
    after = 8
    
    print('\n', df_LF[df_LF.id == group]['id'].values[0])
    display(df_LF.loc[idx-before:idx+after][['DATETIME', 'site', 'period', 'lgth', 'wt_lbs']])



 2007060235


Unnamed: 0,DATETIME,site,period,lgth,wt_lbs
5960,2007-06-01,47,PM,285,133.0
5961,2007-06-01,47,PM,295,133.0
5962,2007-06-02,35,PM,225,122.0
5963,2007-06-02,2,PM,230,122.0
5964,2007-06-02,2,PM,235,122.0
5965,2007-06-02,2,PM,240,122.0
5966,2007-06-02,2,PM,245,122.0
5967,2007-06-02,2,PM,250,122.0
5968,2007-06-02,2,PM,255,122.0
5969,2007-06-02,2,PM,260,122.0



 1991051212


Unnamed: 0,DATETIME,site,period,lgth,wt_lbs
946,1990-06-08 00:00:00,64,AM,285,
947,1990-06-08 00:00:00,64,AM,293,
948,1991-05-12 03:00:00,12,AM,265,1.0
949,1991-05-12 03:00:00,12,AM,270,1.0
950,1991-05-12 03:00:00,12,AM,275,1.0
951,1991-05-13 03:00:00,12,AM,265,11.0
952,1991-05-13 03:00:00,12,AM,270,11.0
953,1991-05-13 03:00:00,12,AM,275,11.0
954,1991-05-13 03:00:00,12,AM,280,11.0
955,1991-05-13 03:00:00,12,AM,285,11.0



 1994050848


Unnamed: 0,DATETIME,site,period,lgth,wt_lbs
10341,1994-06-01,41,AM,280,137.0
10342,1994-06-01,41,AM,300,137.0
10343,1994-05-08,48,AM,250,3.0
10344,1994-05-08,48,AM,255,3.0
10345,1994-05-08,48,AM,260,3.0
10346,1994-05-08,48,AM,265,3.0
10347,1994-06-02,49,PM,235,118.0
10348,1994-06-02,49,PM,240,118.0
10349,1994-06-02,49,PM,245,118.0
10350,1994-06-02,49,PM,250,118.0



 2005050501


Unnamed: 0,DATETIME,site,period,lgth,wt_lbs
5570,2004-06-16,5,PM,260,41.0
5571,2004-06-16,5,PM,265,41.0
5572,2005-05-05,1,PM,240,9.0
5573,2005-05-05,1,PM,250,9.0
5574,2005-05-05,1,PM,255,9.0
5575,2005-05-05,1,PM,260,9.0
5576,2005-05-05,1,PM,265,9.0
5577,2005-05-05,1,PM,270,9.0
5578,2005-05-07,26,PM,240,12.0
5579,2005-05-07,26,PM,245,12.0



 1991051312


Unnamed: 0,DATETIME,site,period,lgth,wt_lbs
949,1991-05-12 03:00:00,12,AM,270,1.0
950,1991-05-12 03:00:00,12,AM,275,1.0
951,1991-05-13 03:00:00,12,AM,265,11.0
952,1991-05-13 03:00:00,12,AM,270,11.0
953,1991-05-13 03:00:00,12,AM,275,11.0
954,1991-05-13 03:00:00,12,AM,280,11.0
955,1991-05-13 03:00:00,12,AM,285,11.0
956,1991-05-13 03:00:00,12,AM,290,11.0
957,1991-05-16 03:00:00,12,PM,255,11.0
958,1991-05-16 03:00:00,12,PM,260,11.0
