<a href="https://colab.research.google.com/github/dakilaledesma/669_Final_Project/blob/main/NN_Graphs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! mkdir data
! cp "drive/MyDrive/UNC/Classes/BIOL 669/fishbase_iucn.csv" data/fishbase_iucn.csv

In [None]:
import pandas as pd
from collections import defaultdict, Counter
from tqdm.notebook import tqdm

In [None]:
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
import pandas as pd
import numpy as np

def the_nn(coi, fam, incl_lc=True, num_layers=10, e=100, bs=8, vs=0.2, retries=3):
  iucn_data = pd.read_csv("data/fishbase_iucn.csv", low_memory=False)
  labels = iucn_data["redlistCategory"].str.lower()
  label_mapping = {}
  for y_cat in labels:
      if y_cat not in label_mapping.keys():
        label_mapping[y_cat] = len(label_mapping.values())

  try:
    coi.remove("FamCode")
  except ValueError:
    pass

  xy = iucn_data[["FamCode"] + coi + ["redlistCategory"]]
  if not incl_lc:
    xy = xy[xy["redlistCategory"].str.lower() != "least concern"]
  xy = xy.dropna()
  xy_gen = xy[xy["FamCode"] == int(fam)]

  if len(xy_gen) < 10:
    return f"Less than ten samples: {len(xy_gen)}"

  if len(list(set(xy["redlistCategory"]))) == 1:
    return "Only 1 category"

  if len(coi) == 0:
    return "No columns of interest"

  X = xy_gen[coi]
  y = []
  counts = defaultdict(int)
  for label in list(xy_gen['redlistCategory'].str.lower()):
    counts[label] += 1

    y_arr = np.zeros((len(label_mapping.values())))
    y_arr[label_mapping.get(label)] = 1
    y.append(y_arr)
  y = np.array(y)

  num_classes = y.shape[1]
  num_inputs = X.shape[1]

  val_accs = []
  for seed in range(retries):
      model = Sequential()
      model.add(Dense(num_inputs, input_shape=(num_inputs,), activation='relu'))
      for n in range(num_layers):
        model.add(Dense(50, activation='relu'))
      model.add(Dense(num_classes, activation='softmax'))

      model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
      history = model.fit(X, y, epochs=e, batch_size=bs, verbose=0, validation_split=vs)
      val_accs.append(max(history.history['val_accuracy']))
  return f"Family: {fam} Samples: {y.shape[0]} Acc: {sum(val_accs) / len(val_accs)} Count: {counts}  Accs: {val_accs}"
  # return max(history.history['val_accuracy'])

In [None]:
result_df = pd.read_csv("drive/MyDrive/UNC/Classes/BIOL 669/column_analysis.csv")
print(result_df)

        Family: 2   Samples: 62       SpecCode: 0.3076923191547394
0       Family: 2   Samples: 62   SpeciesRefNo: 0.3076923191547394
1       Family: 2   Samples: 62        FamCode: 0.4615384638309479
2       Family: 2   Samples: 62        GenCode: 0.3076923191547394
3       Family: 2   Samples: 62       TaxIssue: 0.3076923191547394
4       Family: 2   Samples: 62          Fresh: 0.3076923191547394
...           ...           ...                                ...
4299  Family: 789   Samples: 33    GoogleImage: 0.7142857313156128
4300  Family: 789   Samples: 33           PD50: 0.7142857313156128
4301  Family: 789   Samples: 33     Emblematic: 0.7142857313156128
4302  Family: 789   Samples: 33        Entered: 0.7142857313156128
4303  Family: 789   Samples: 33       Modified: 0.7142857313156128

[4304 rows x 3 columns]


In [None]:
result_file = open("drive/MyDrive/UNC/Classes/BIOL 669/column_analysis.csv")
result_lines = result_file.readlines()

result_dict = defaultdict(list)
for l in result_lines:
  l = l.strip().split(',')
  family = l[0].split(": ")[1]
  result_dict[family].append(l[2])

for k, v in tqdm(result_dict.items(), total=len(list(result_dict.items()))):
  accs = [val.split(": ")[1] for val in v]
  data = Counter(accs)
  common = max(data, key=data.get)

  coi = [val.split(": ")[0].replace(" ", '') for val in v if val.split(": ")[1] != common]

  print(k, the_nn(coi, k), coi)



  0%|          | 0/132 [00:00<?, ?it/s]

2 Family: 2 Samples: 21 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'critically endangered': 1, 'least concern': 13, 'data deficient': 3, 'near threatened': 1, 'vulnerable': 2, 'endangered': 1})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MTraps', 'PD50', 'Entered', 'Modified', 'Expert']
10 Family: 10 Samples: 13 Acc: 0.5555555721124014 Count: defaultdict(<class 'int'>, {'near threatened': 1, 'data deficient': 2, 'critically endangered': 1, 'vulnerable': 2, 'least concern': 5, 'endangered': 2})  Accs: [0.6666666865348816, 0.3333333432674408, 0.6666666865348816] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'ElectroRef', 'Modified', 'Expert']
11 Less than ten samples: 4 ['TaxIssue', 'Brack', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', '

In [None]:
out = """
2 Family: 2 Samples: 21 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'critically endangered': 1, 'least concern': 13, 'data deficient': 3, 'near threatened': 1, 'vulnerable': 2, 'endangered': 1})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MTraps', 'PD50', 'Entered', 'Modified', 'Expert']
10 Family: 10 Samples: 13 Acc: 0.5555555721124014 Count: defaultdict(<class 'int'>, {'near threatened': 1, 'data deficient': 2, 'critically endangered': 1, 'vulnerable': 2, 'least concern': 5, 'endangered': 2})  Accs: [0.6666666865348816, 0.3333333432674408, 0.6666666865348816] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'ElectroRef', 'Modified', 'Expert']
11 Less than ten samples: 4 ['TaxIssue', 'Brack', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'Vulnerability', 'Length', 'LengthFemale', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'MSpears', 'MDredges', 'GameRef', 'ElectroRef', 'GoogleImage', 'PD50', 'Modified', 'Expert']
13 Family: 13 Samples: 11 Acc: 0.5555555721124014 Count: defaultdict(<class 'int'>, {'data deficient': 1, 'endangered': 4, 'vulnerable': 3, 'least concern': 2, 'near threatened': 1})  Accs: [0.6666666865348816, 0.6666666865348816, 0.3333333432674408] ['SpecCode', 'SpeciesRefNo', 'GenCode', 'TaxIssue', 'Brack', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'MSeines', 'MTrawls', 'MHooksLines', 'GoogleImage', 'Modified']
15 Family: 15 Samples: 18 Acc: 0.6666666666666666 Count: defaultdict(<class 'int'>, {'critically endangered': 9, 'near threatened': 2, 'vulnerable': 1, 'least concern': 4, 'endangered': 2})  Accs: [0.75, 0.5, 0.75] ['GenCode', 'TaxIssue', 'DepthRangeDeep', 'DepthRangeRef', 'Vulnerability', 'Length', 'MaxLengthRef', 'MTrawls', 'Modified']
17 Family: 17 Samples: 20 Acc: 0.4166666666666667 Count: defaultdict(<class 'int'>, {'endangered': 4, 'data deficient': 3, 'critically endangered': 8, 'vulnerable': 3, 'near threatened': 2})  Accs: [0.5, 0.25, 0.5] ['TaxIssue', 'DepthRangeDeep', 'DepthRangeRef', 'MSeines', 'MGillnets', 'MSpears', 'PD50', 'Modified']
18 Family: 18 Samples: 20 Acc: 0.5 Count: defaultdict(<class 'int'>, {'least concern': 11, 'near threatened': 1, 'critically endangered': 1, 'endangered': 3, 'data deficient': 2, 'vulnerable': 2})  Accs: [0.5, 0.5, 0.5] ['GenCode']
19 Less than ten samples: 7 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'GameRef', 'ElectroRef', 'PD50', 'Modified', 'Expert']
20 Family: 20 Samples: 12 Acc: 0.4444444576899211 Count: defaultdict(<class 'int'>, {'vulnerable': 5, 'endangered': 1, 'near threatened': 1, 'data deficient': 1, 'least concern': 4})  Accs: [0.6666666865348816, 0.6666666865348816, 0.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'DangerousRef', 'Entered', 'Modified', 'Expert']
22 Family: 22 Samples: 21 Acc: 0.6000000238418579 Count: defaultdict(<class 'int'>, {'data deficient': 1, 'critically endangered': 5, 'least concern': 2, 'endangered': 3, 'vulnerable': 9, 'near threatened': 1})  Accs: [0.6000000238418579, 0.6000000238418579, 0.6000000238418579] ['SpecCode', 'MSeines', 'MOther', 'GameFish']
25 Family: 25 Samples: 12 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 11, 'vulnerable': 1})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'Modified']
32 Family: 32 Samples: 18 Acc: 0.75 Count: defaultdict(<class 'int'>, {'endangered': 1, 'vulnerable': 1, 'critically endangered': 13, 'near threatened': 2, 'least concern': 1})  Accs: [0.75, 0.75, 0.75] ['MaxWeightRef', 'MHooksLines', 'ElectroRef', 'Expert']
43 Family: 43 Samples: 11 Acc: 0.6666666865348816 Count: defaultdict(<class 'int'>, {'least concern': 10, 'data deficient': 1})  Accs: [0.6666666865348816, 0.6666666865348816, 0.6666666865348816] ['SpeciesRefNo', 'TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'LongevityWild', 'LongevityWildRef', 'ImportanceRef', 'BaitRef', 'PD50', 'Modified', 'Expert']
49 Family: 49 Samples: 20 Acc: 0.4166666666666667 Count: defaultdict(<class 'int'>, {'endangered': 4, 'critically endangered': 2, 'near threatened': 4, 'data deficient': 3, 'least concern': 7})  Accs: [0.5, 0.25, 0.5] ['MaxLengthRef', 'MTrawls', 'Expert']
56 Less than ten samples: 7 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
57 Family: 57 Samples: 20 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 15, 'data deficient': 2, 'not applicable': 3})  Accs: [0.75, 0.75, 0.75] ['SpecCode', 'DepthRangeDeep', 'Vulnerability', 'MHooksLines']
62 Family: 62 Samples: 11 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 11})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
66 Family: 66 Samples: 11 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 11})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
76 Family: 76 Samples: 19 Acc: 0.75 Count: defaultdict(<class 'int'>, {'vulnerable': 2, 'least concern': 11, 'lower risk/least concern': 2, 'not applicable': 2, 'extinct': 1, 'extinct in the wild': 1})  Accs: [0.75, 0.75, 0.75] ['MigratRef', 'Expert']
86 Family: 86 Samples: 30 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 25, 'data deficient': 5})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeShallow', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Modified', 'Expert']
87 No columns of interest []
88 Family: 88 Samples: 21 Acc: 0.6000000238418579 Count: defaultdict(<class 'int'>, {'least concern': 18, 'not applicable': 2, 'data deficient': 1})  Accs: [0.6000000238418579, 0.6000000238418579, 0.6000000238418579] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Expert']
89 Family: 89 Samples: 21 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 19, 'not applicable': 2})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Modified', 'Expert']
91 Family: 91 Samples: 14 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 11, 'not applicable': 3})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'PD50', 'Modified', 'Expert']
145 Family: 145 Samples: 25 Acc: 0.6000000238418579 Count: defaultdict(<class 'int'>, {'least concern': 21, 'data deficient': 3, 'vulnerable': 1})  Accs: [0.6000000238418579, 0.6000000238418579, 0.6000000238418579] ['TaxIssue', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'DangerousRef', 'Expert']
160 Family: 160 Samples: 14 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 14})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'PD50', 'Modified', 'Expert']
167 Family: 167 Samples: 11 Acc: 0.6666666666666666 Count: defaultdict(<class 'int'>, {'least concern': 7, 'not applicable': 3, 'data deficient': 1})  Accs: [0.0, 1.0, 1.0] ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'Entered', 'Expert']
169 Family: 169 Samples: 28 Acc: 0.6111111243565878 Count: defaultdict(<class 'int'>, {'least concern': 21, 'data deficient': 7})  Accs: [0.6666666865348816, 0.6666666865348816, 0.5] ['SpecCode', 'Saltwater', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified']
174 No columns of interest []
180 Family: 180 Samples: 18 Acc: 1.0 Count: defaultdict(<class 'int'>, {'data deficient': 1, 'least concern': 17})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Modified']
185 Family: 185 Samples: 20 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 17, 'data deficient': 1, 'critically endangered': 1, 'endangered': 1})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Modified']
186 Less than ten samples: 2 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'PD50', 'Modified', 'Expert']
187 Family: 187 Samples: 22 Acc: 1.0 Count: defaultdict(<class 'int'>, {'data deficient': 2, 'least concern': 20})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef']
188 Family: 188 Samples: 29 Acc: 0.6666666865348816 Count: defaultdict(<class 'int'>, {'least concern': 23, 'data deficient': 6})  Accs: [0.6666666865348816, 0.6666666865348816, 0.6666666865348816] ['DepthRangeShallow', 'MTraps']
189 Family: 189 Samples: 11 Acc: 0.6666666865348816 Count: defaultdict(<class 'int'>, {'least concern': 9, 'data deficient': 1, 'endangered': 1})  Accs: [0.6666666865348816, 0.6666666865348816, 0.6666666865348816] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified', 'Expert']
190 Family: 190 Samples: 29 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 25, 'data deficient': 3, 'near threatened': 1})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['TaxIssue', 'DepthRangeShallow']
192 No columns of interest []
193 Family: 193 Samples: 29 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 24, 'data deficient': 5})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeRef']
194 Family: 194 Samples: 18 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 18})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'MOther', 'Modified', 'Expert']
198 Family: 198 Samples: 18 Acc: 0.8333333333333334 Count: defaultdict(<class 'int'>, {'data deficient': 7, 'not applicable': 2, 'least concern': 9})  Accs: [1.0, 0.75, 0.75] ['SpecCode', 'SpeciesRefNo', 'TaxIssue', 'Saltwater', 'DepthRangeDeep', 'DepthRangeRef', 'Vulnerability', 'MaxLengthRef', 'MGillnets', 'MDredges', 'MLiftnets', 'Modified']
199 Less than ten samples: 6 ['TaxIssue', 'DepthRangeShallow', 'Length', 'LengthFemale', 'MaxLengthRef', 'PD50', 'Entered', 'Modified']
200 Family: 200 Samples: 20 Acc: 0.75 Count: defaultdict(<class 'int'>, {'data deficient': 6, 'least concern': 13, 'not applicable': 1})  Accs: [0.75, 0.75, 0.75] ['GenCode', 'Saltwater', 'LengthFemale', 'MaxLengthRef', 'Entered']
204 Family: 204 Samples: 19 Acc: 0.5 Count: defaultdict(<class 'int'>, {'data deficient': 6, 'least concern': 11, 'not applicable': 2})  Accs: [0.5, 0.5, 0.5] ['SpeciesRefNo', 'GenCode', 'TaxIssue', 'DepthRangeShallow', 'DepthRangeRef', 'Vulnerability', 'LengthFemale', 'MaxLengthRef', 'MTraps', 'PD50']
206 Family: 206 Samples: 42 Acc: 0.8888888955116272 Count: defaultdict(<class 'int'>, {'not applicable': 1, 'least concern': 35, 'data deficient': 6})  Accs: [0.8888888955116272, 0.8888888955116272, 0.8888888955116272] ['TaxIssue', 'MigratRef', 'ImportanceRef', 'Expert']
207 Family: 207 Samples: 24 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 23, 'data deficient': 1})  Accs: [1.0, 1.0, 1.0] ['Modified']
216 Family: 216 Samples: 30 Acc: 0.6666666865348816 Count: defaultdict(<class 'int'>, {'least concern': 23, 'data deficient': 2, 'endangered': 1, 'near threatened': 2, 'critically endangered': 1, 'vulnerable': 1})  Accs: [0.6666666865348816, 0.6666666865348816, 0.6666666865348816] ['Modified']
218 No columns of interest []
233 Family: 233 Samples: 19 Acc: 0.6666666666666666 Count: defaultdict(<class 'int'>, {'least concern': 11, 'data deficient': 3, 'not applicable': 5})  Accs: [0.5, 0.75, 0.75] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'PD50', 'Modified']
236 Family: 236 Samples: 26 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'data deficient': 2, 'least concern': 21, 'not applicable': 2, 'vulnerable': 1})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified']
243 Family: 243 Samples: 24 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'least concern': 23, 'not applicable': 1})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['CommonLength', 'CommonLengthRef', 'ImportanceRef']
246 Family: 246 Samples: 17 Acc: 0.5 Count: defaultdict(<class 'int'>, {'not applicable': 3, 'least concern': 4, 'data deficient': 10})  Accs: [0.5, 0.5, 0.5] ['DepthRangeShallow', 'DepthRangeDeep', 'Vulnerability', 'Length', 'MaxLengthRef']
258 Less than ten samples: 1 ['SpecCode', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'AquariumRef', 'Modified', 'Expert']
264 Less than ten samples: 3 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
266 Less than ten samples: 2 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Expert']
273 Family: 273 Samples: 16 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 15, 'data deficient': 1})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'MHooksLines', 'Modified', 'Expert']
277 Family: 277 Samples: 19 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 16, 'near threatened': 1, 'data deficient': 2})  Accs: [0.75, 0.75, 0.75] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef']
289 Less than ten samples: 0 ['TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquacultureRef', 'AquariumRef', 'GameRef', 'DangerousRef', 'Modified', 'Expert']
291 Less than ten samples: 3 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'AquariumRef', 'Modified', 'Expert']
293 No columns of interest []
303 No columns of interest []
304 Family: 304 Samples: 47 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 45, 'data deficient': 2})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified', 'Expert']
313 Family: 313 Samples: 21 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 16, 'not applicable': 3, 'data deficient': 2})  Accs: [1.0, 1.0, 1.0] ['DepthRangeRef', 'MaxLengthRef']
314 Less than ten samples: 0 ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'LongevityCaptive', 'LongevityCapRef', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquacultureRef', 'BaitRef', 'AquariumRef', 'GameRef', 'DangerousRef', 'Expert']
323 Less than ten samples: 6 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquariumRef', 'GameRef', 'DangerousRef', 'Expert']
324 No columns of interest []
326 No columns of interest []
327 Family: 327 Samples: 13 Acc: 1.0 Count: defaultdict(<class 'int'>, {'data deficient': 4, 'least concern': 8, 'near threatened': 1})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
328 Family: 328 Samples: 41 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 39, 'data deficient': 1, 'endangered': 1})  Accs: [1.0, 1.0, 1.0] ['SpecCode']
330 Less than ten samples: 0 ['TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquacultureRef', 'GameRef', 'Modified', 'Expert']
331 Less than ten samples: 0 ['TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'GameRef', 'Modified', 'Expert']
332 Family: 332 Samples: 14 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 13, 'vulnerable': 1})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'PD50', 'Modified', 'Expert']
343 Family: 343 Samples: 19 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 18, 'vulnerable': 1})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'AquariumRef', 'Modified', 'Expert']
349 Family: 349 Samples: 30 Acc: 0.5 Count: defaultdict(<class 'int'>, {'least concern': 23, 'data deficient': 6, 'vulnerable': 1})  Accs: [0.5, 0.5, 0.5] ['Expert']
350 Less than ten samples: 9 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'AquariumRef', 'Modified', 'Expert']
352 Family: 352 Samples: 24 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 22, 'endangered': 1, 'data deficient': 1})  Accs: [1.0, 1.0, 1.0] ['Length', 'MaxLengthRef', 'Modified']
359 Family: 359 Samples: 18 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 17, 'data deficient': 1})  Accs: [1.0, 1.0, 1.0] ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'AquacultureRef', 'PD50', 'Entered', 'Modified', 'Expert']
360 Family: 360 Samples: 21 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'least concern': 19, 'not applicable': 1, 'data deficient': 1})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'Expert']
362 Less than ten samples: 0 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquariumRef', 'GameRef', 'Modified', 'Expert']
364 Less than ten samples: 4 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
366 Family: 366 Samples: 21 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 18, 'data deficient': 3})  Accs: [1.0, 1.0, 1.0] ['SpecCode', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified']
368 Family: 368 Samples: 24 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'least concern': 19, 'not applicable': 3, 'data deficient': 2})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'Modified']
380 Family: 380 Samples: 27 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 22, 'data deficient': 3, 'vulnerable': 2})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified']
390 Less than ten samples: 5 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Modified', 'Expert']
391 Family: 391 Samples: 12 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 12})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified']
392 Less than ten samples: 0 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'AquariumRef', 'Modified', 'Expert']
404 Less than ten samples: 5 ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ImportanceRef', 'Modified', 'Expert']
405 Less than ten samples: 0 ['TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'AquariumRef', 'PD50', 'Modified', 'Expert']
412 Family: 412 Samples: 11 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 11})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'MLiftnets', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
413 Family: 413 Samples: 15 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 14, 'data deficient': 1})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'CommonLength', 'CommonLengthRef', 'DangerousRef', 'Entered', 'Modified', 'Expert']
415 Family: 415 Samples: 20 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 16, 'data deficient': 4})  Accs: [0.75, 0.75, 0.75] ['ImportanceRef']
416 Less than ten samples: 2 ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'LongevityWild', 'LongevityWildRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'GameRef', 'DangerousRef', 'Expert']
434 Less than ten samples: 5 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified', 'Expert']
435 Family: 435 Samples: 25 Acc: 0.9333333373069763 Count: defaultdict(<class 'int'>, {'vulnerable': 1, 'least concern': 22, 'data deficient': 2})  Accs: [0.800000011920929, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Entered', 'Modified']
439 Family: 439 Samples: 27 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 25, 'data deficient': 2})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
440 Family: 440 Samples: 10 Acc: 0.8333333333333334 Count: defaultdict(<class 'int'>, {'data deficient': 1, 'least concern': 6, 'near threatened': 3})  Accs: [1.0, 0.5, 1.0] ['MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'LongevityWild', 'LongevityWildRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'Weight', 'MaxWeightRef', 'ImportanceRef', 'Modified', 'Expert']
441 Family: 441 Samples: 26 Acc: 0.6111111144224802 Count: defaultdict(<class 'int'>, {'least concern': 18, 'data deficient': 7, 'not applicable': 1})  Accs: [0.3333333432674408, 0.6666666865348816, 0.8333333134651184] ['SpecCode', 'SpeciesRefNo', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
442 Less than ten samples: 4 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'DepthRangeComShallow', 'DepthRangeComDeep', 'DepthComRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
445 Family: 445 Samples: 25 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 18, 'data deficient': 2, 'near threatened': 2, 'vulnerable': 2, 'not applicable': 1})  Accs: [1.0, 1.0, 1.0] ['MTraps']
448 Less than ten samples: 2 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'AquariumRef', 'DangerousRef', 'Modified', 'Expert']
449 Family: 449 Samples: 21 Acc: 0.800000011920929 Count: defaultdict(<class 'int'>, {'data deficient': 3, 'least concern': 16, 'not applicable': 2})  Accs: [0.800000011920929, 0.800000011920929, 0.800000011920929] ['Length', 'MSeines']
454 Family: 454 Samples: 10 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 8, 'data deficient': 2})  Accs: [1.0, 1.0, 1.0] ['GenCode', 'TaxIssue', 'MigratRef', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'MTraps', 'BaitRef', 'Expert']
459 No columns of interest []
460 No columns of interest []
468 No columns of interest []
470 Family: 470 Samples: 20 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 19, 'not applicable': 1})  Accs: [0.75, 0.75, 0.75] ['DepthRangeDeep', 'DepthRangeRef']
472 Less than ten samples: 0 ['SpecCode', 'GenCode', 'TaxIssue', 'Fresh', 'Brack', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'MSeines', 'MTraps', 'MSpears', 'Entered', 'Modified', 'Expert']
475 Family: 475 Samples: 15 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 14, 'not applicable': 1})  Accs: [1.0, 1.0, 1.0] ['TaxIssue', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Expert']
482 Family: 482 Samples: 16 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 16})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'AquariumRef', 'Modified', 'Expert']
483 Family: 483 Samples: 12 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 12})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'Modified', 'Expert']
488 Less than ten samples: 9 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'MTraps', 'MTrawls', 'MHooksLines', 'GameFish', 'ElectroRef', 'Modified', 'Expert']
500 Family: 500 Samples: 18 Acc: 0.6666666666666666 Count: defaultdict(<class 'int'>, {'least concern': 13, 'near threatened': 1, 'vulnerable': 3, 'data deficient': 1})  Accs: [0.75, 0.75, 0.5] ['DepthRangeDeep', 'DepthRangeRef', 'Modified']
503 Family: 503 Samples: 18 Acc: 0.75 Count: defaultdict(<class 'int'>, {'vulnerable': 7, 'least concern': 9, 'near threatened': 1, 'data deficient': 1})  Accs: [1.0, 0.75, 0.5] ['GenCode', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Vulnerability', 'Length', 'MGillnets', 'MOther', 'Modified']
507 No columns of interest []
514 Family: 514 Samples: 48 Acc: 0.9333333174387614 Count: defaultdict(<class 'int'>, {'least concern': 42, 'data deficient': 5, 'vulnerable': 1})  Accs: [0.8999999761581421, 0.8999999761581421, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef']
516 Family: 516 Samples: 22 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 20, 'data deficient': 2})  Accs: [1.0, 1.0, 1.0] ['MaxLengthRef']
517 Less than ten samples: 6 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'MGillnets', 'Entered', 'Modified', 'Expert']
560 Family: 560 Samples: 26 Acc: 0.8333333134651184 Count: defaultdict(<class 'int'>, {'least concern': 23, 'not applicable': 2, 'data deficient': 1})  Accs: [0.8333333134651184, 0.8333333134651184, 0.8333333134651184] ['DepthRangeShallow', 'Length', 'MaxLengthRef']
578 Family: 578 Samples: 23 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 20, 'data deficient': 3})  Accs: [1.0, 1.0, 1.0] ['MSpears']
581 No columns of interest []
589 Family: 589 Samples: 13 Acc: 0.5555555721124014 Count: defaultdict(<class 'int'>, {'least concern': 5, 'data deficient': 8})  Accs: [0.3333333432674408, 0.6666666865348816, 0.6666666865348816] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'Modified']
615 No columns of interest []
620 Family: 620 Samples: 16 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 10, 'data deficient': 6})  Accs: [0.75, 0.75, 0.75] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'ImportanceRef']
623 Family: 623 Samples: 33 Acc: 0.7142857313156128 Count: defaultdict(<class 'int'>, {'least concern': 27, 'data deficient': 2, 'endangered': 2, 'near threatened': 1, 'vulnerable': 1})  Accs: [0.7142857313156128, 0.7142857313156128, 0.7142857313156128] ['Length', 'MaxLengthRef', 'Entered', 'Expert']
638 Less than ten samples: 3 ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'ElectroRef', 'Modified', 'Expert']
658 Family: 658 Samples: 20 Acc: 0.5 Count: defaultdict(<class 'int'>, {'least concern': 6, 'endangered': 1, 'near threatened': 3, 'data deficient': 6, 'vulnerable': 4})  Accs: [0.5, 0.5, 0.5] ['DepthRangeShallow', 'DepthRangeRef', 'Vulnerability', 'MGillnets', 'MTrawls', 'MHooksLines', 'GameFish']
685 Family: 685 Samples: 14 Acc: 0.6666666865348816 Count: defaultdict(<class 'int'>, {'critically endangered': 1, 'least concern': 12, 'near threatened': 1})  Accs: [0.6666666865348816, 0.6666666865348816, 0.6666666865348816] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'ElectroRef', 'Modified']
703 Family: 703 Samples: 15 Acc: 0.8888888955116272 Count: defaultdict(<class 'int'>, {'least concern': 15})  Accs: [1.0, 0.6666666865348816, 1.0] ['TaxIssue', 'DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'MaxLengthRef', 'ImportanceRef', 'MOther', 'ElectroRef', 'Modified', 'Expert']
723 Less than ten samples: 5 ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'LengthFemale', 'ImportanceRef', 'Entered', 'Modified']
728 Family: 728 Samples: 15 Acc: 1.0 Count: defaultdict(<class 'int'>, {'least concern': 15})  Accs: [1.0, 1.0, 1.0] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Length', 'MaxLengthRef', 'CommonLength', 'CommonLengthRef', 'ImportanceRef', 'Modified', 'Expert']
772 Family: 772 Samples: 20 Acc: 0.5 Count: defaultdict(<class 'int'>, {'data deficient': 7, 'least concern': 11, 'endangered': 2})  Accs: [0.5, 0.5, 0.5] ['SpeciesRefNo']
773 Family: 773 Samples: 20 Acc: 0.75 Count: defaultdict(<class 'int'>, {'least concern': 16, 'data deficient': 2, 'not applicable': 2})  Accs: [0.75, 0.75, 0.75] ['DepthRangeShallow', 'DepthRangeDeep', 'DepthRangeRef', 'Modified']
789 Family: 789 Samples: 33 Acc: 0.7142857313156128 Count: defaultdict(<class 'int'>, {'least concern': 27, 'data deficient': 6})  Accs: [0.7142857313156128, 0.7142857313156128, 0.7142857313156128] ['MCastnets']
"""

In [None]:
out_lines = out.split('\n')

accs = []
props = []
sampss = []
for line in out_lines:
  if "Samples:" not in line:
    continue
  
  acc = float(line.split("Acc: ")[1].split(" Count:")[0])
  samps = int(line.split("Samples: ")[1].split(" Acc:")[0])
  
  count_str = line.split("{")[1].split("}")[0]
  max_count = max([int(v.split(":")[1]) for v in count_str.split(",")])
  prop = max_count / samps
  accs.append(acc)
  props.append(prop)
  sampss.append(samps)


In [None]:
import plotly.express as px
fig = px.scatter(x=accs, y=props, labels={
                     "x": "Accuracy",
                     "y": "Proportion of Category with Most Samples",
                 },
                 title="Accuracy vs. Disproportion of Categorical Samples per Family",
                 width=800, height=600)
fig.show()



In [None]:
import plotly.express as px
fig = px.scatter(x=accs, y=sampss, labels={
                     "x": "Accuracy",
                     "y": "Number of Samples",
                 },
                 title="Accuracy vs. Sample Size per Family",
                 width=800, height=600)
fig.show()

