# Fishing Board Game Card Data Analysis

In [None]:
import math
import pandas as pd
import yaml

## Fish Cards

In [None]:
with open("data.yaml") as stream:
    try:
        data = yaml.safe_load(stream)
    except yaml.YAMLError as e:
        print(e)

card_df = pd.DataFrame(data["cardData"])
card_df["adjustedOdds"] = card_df["odds"] * 6 ** (4 - card_df["numDice"]) // 6
card_df["adjustedOdds"] = card_df["adjustedOdds"].apply(lambda x: round(x / 2.16, 2))
card_df["money"] = card_df["adjustedOdds"].apply(lambda x: math.floor(math.log2(360 / x))) + card_df["numDice"]

fish_df = pd.DataFrame(data["fishData"])

In [None]:
card_df

In [None]:
fish_df

In [None]:
card_df["numDice"].value_counts().sort_index().reset_index()

In [None]:
pd.crosstab(card_df["reputation"], card_df["money"], margins=True)

In [None]:
card_df.groupby(["numDice", "reputation", "money", "adjustedOdds"]).size().reset_index(name="count").sort_values(["numDice", "adjustedOdds"], ascending=[True, False]).reset_index(drop=True)

In [None]:
pd.crosstab(fish_df["family"], fish_df["habitat"], margins=True)

In [None]:
pd.crosstab(fish_df["rarity"], fish_df["habitat"], margins=True)

In [None]:
sorted_df = fish_df.sort_values(by=["habitat", "family", "rarity", "name"])

markdown_lines = [
    "| name | scientificName | habitat | family | rarity |",
    "|------|----------------|---------|--------|--------|"
]

for _, row in sorted_df.iterrows():
    markdown_lines.append(
        f"| {row['name']} | {row['scientificName']} | {row['habitat']} | "
        f"{row['family']} | {row['rarity']} |"
    )

with open("fish_table.md", "w") as file:
    file.write("\n".join(markdown_lines))

### Scientific Name Analysis

In [None]:
with open("fish_tax_data.yaml") as stream:
    try:
        data = yaml.safe_load(stream)
    except yaml.YAMLError as e:
        print(e)

tax_df = pd.DataFrame(data["fishData"])

In [None]:
tax_df

In [None]:
tax_df["scientificOrder"].value_counts().reset_index()

In [None]:
tax_count_df = tax_df[["scientificOrder", "scientificFamily", "family"]].value_counts().sort_index().reset_index()
tax_count_df[tax_count_df["count"] == 1]

In [None]:
tax_df[["scientificOrder", "family"]].value_counts().sort_index().reset_index()

In [None]:
duplicate_sci_fam_df = tax_count_df[tax_count_df.duplicated('scientificFamily', keep=False)]
duplicate_sci_fam_df.groupby('scientificFamily').filter(lambda x: x['family'].nunique() > 1)

In [None]:
fam_count_df = tax_df[["family", "scientificFamily"]].value_counts().sort_index().reset_index()
duplicate_fam_df = fam_count_df[fam_count_df.duplicated('family', keep=False)]
duplicate_fam_df.groupby('family').filter(lambda x: x['scientificFamily'].nunique() > 1)

In [None]:
order_count_df = tax_df[["family", "scientificOrder"]].value_counts().sort_index().reset_index()
duplicate_order_df = order_count_df[order_count_df.duplicated('family', keep=False)]
duplicate_order_df.groupby('family').filter(lambda x: x['scientificOrder'].nunique() > 1)