# 2022 Analysis


In [53]:
import pandas as pd

# Import cleaned data from csv file
df = pd.read_csv(
    "./2022_results_cleaned.csv",
    # Import was failing to parse date columns, so I
    # to add the column names
    parse_dates=[
        "preferred_entry_date_1",
        "preferred_entry_date_2",
        "preferred_entry_date_3",
        "awarded_entry_date",
    ],
    date_format="%m-%d-%Y",  # Align format with export format
    na_filter=False,  # Do not convert 'N/A' to NaN
)

df.head()

Unnamed: 0,preferred_entry_date_1,preferred_zone_1,minimum_acceptable_group_size_1,preferred_entry_date_2,preferred_zone_2,minimum_acceptable_group_size_2,preferred_entry_date_3,preferred_zone_3,minimum_acceptable_group_size_3,results_status,awarded_preference,awarded_entry_date,awarded_entrance,awarded_group_size
0,2022-09-02,Core Enchantment Zone,8,2022-08-26,Colchuck Zone,8,2022-09-16,Core Enchantment Zone,8,Unsuccessful,0,1970-01-01,,0
1,2022-08-15,Colchuck Zone,2,2022-08-24,Colchuck Zone,2,2022-08-29,Colchuck Zone,2,Unsuccessful,0,1970-01-01,,0
2,2022-08-12,Snow Zone,8,2022-08-19,Snow Zone,8,2022-08-03,Snow Zone,8,Unsuccessful,0,1970-01-01,,0
3,2022-07-12,Core Enchantment Zone,2,2022-07-20,Core Enchantment Zone,2,2022-07-13,Snow Zone,2,Unsuccessful,0,1970-01-01,,0
4,2022-09-03,Stuart Zone,4,2022-08-28,Stuart Zone,4,2022-08-21,Stuart Zone,4,Unsuccessful,0,1970-01-01,,0


## Full Application Analysis


In [54]:
# Get the total number of applications in the dataset
total_applications = len(df)

# Print the result
print(f"Total number of applications in 2022: {total_applications}")

Total number of applications in 2022: 36827


In [55]:
# Get the total number of awarded permits
total_awarded = len(df[df["results_status"] == "Awarded"])

# Print the result
print(f"Total number of awarded permits in 2022: {total_awarded}")

Total number of awarded permits in 2022: 2528


In [56]:
# Probability of getting an awarded permit
probability_awarded = total_awarded / total_applications

# Print the result
print(
    f"Probability of getting an awarded permit in 2022: {probability_awarded:.2%} ({total_awarded}/{total_applications})"
)

Probability of getting an awarded permit in 2022: 6.86% (2528/36827)


In [57]:
# Show awarded permits by awarded status across teh different zones
awarded = df[df["results_status"] == "Awarded"]

# Print the result
print(f"Awarded permits by zone:")

pd.crosstab(
    awarded["awarded_entrance"],
    awarded["results_status"],
)

Awarded permits by zone:


results_status,Awarded
awarded_entrance,Unnamed: 1_level_1
Colchuck Zone,353
Core Enchantment Zone,723
Eightmile/Caroline Zone,284
Eightmile/Caroline Zone (stock),39
Snow Zone,637
Stuart Zone,472
Stuart Zone (stock),20


In [58]:
# Probability you were awarded a permit for the Core Enchantment Zone
probability_core_awarded = (
    len(awarded[awarded["awarded_entrance"] == "Core Enchantment Zone"])
    / total_applications
)

# Print the result
print(
    f"Probability of getting an awarded permit for the Core Enchantment Zone in 2022: {probability_core_awarded:.2%}"
)

Probability of getting an awarded permit for the Core Enchantment Zone in 2022: 1.96%


In [59]:
# Probability of being awarded a permit given you applied for Core Zone
applied_core_zone_1_filter = df["preferred_zone_1"] == "Core Enchantment Zone"
applied_core_zone_2_filter = df["preferred_zone_2"] == "Core Enchantment Zone"
applied_core_zone_3_filter = df["preferred_zone_3"] == "Core Enchantment Zone"

applied_atleast_one_core_zone_filter = (
    applied_core_zone_1_filter | applied_core_zone_2_filter | applied_core_zone_3_filter
)

applied_core_zone = df[applied_atleast_one_core_zone_filter]

applied_awarded_core_zone = applied_core_zone[
    applied_core_zone["results_status"] == "Awarded"
]

probability_applied_awarded_core_zone = len(applied_awarded_core_zone) / len(
    applied_core_zone
)

# Print the result
print(
    f"Probability of being awarded a permit given you applied for Core Zone in 2022: {probability_applied_awarded_core_zone:.2%} ({len(applied_awarded_core_zone)}/{len(applied_core_zone)})"
)

Probability of being awarded a permit given you applied for Core Zone in 2022: 5.65% (1730/30644)


In [60]:
# Probability of being awarded a permit given you DID NOT apply for the Core Zone
did_not_apply_core_zone_filter = ~applied_atleast_one_core_zone_filter

did_not_apply_core_zone = df[did_not_apply_core_zone_filter]

did_not_apply_awarded_core_zone = did_not_apply_core_zone[
    did_not_apply_core_zone["results_status"] == "Awarded"
]

probability_did_not_apply_awarded_core_zone = len(
    did_not_apply_awarded_core_zone
) / len(did_not_apply_core_zone)

# Print the result
print(
    f"Probability of being awarded a permit given you DID NOT apply for Core Zone in 2022: {probability_did_not_apply_awarded_core_zone:.2%} ({len(did_not_apply_awarded_core_zone)}/{len(did_not_apply_core_zone)})"
)

Probability of being awarded a permit given you DID NOT apply for Core Zone in 2022: 12.91% (798/6183)


In [61]:
# Probability of being awarded a permit given you applied for only the Core Zone
division_2_na_filter = df["preferred_zone_2"] == "N/A"
division_3_na_filter = df["preferred_zone_3"] == "N/A"


only_applied_core_filter = (
    applied_core_zone_1_filter & applied_core_zone_2_filter & applied_core_zone_3_filter
)
only_applied_core_2_filter = (
    applied_core_zone_1_filter & division_2_na_filter & division_3_na_filter
)
only_applied_core_3_filter = (
    applied_core_zone_1_filter & applied_core_zone_2_filter & division_3_na_filter
)

only_applied_core = df[
    only_applied_core_filter | only_applied_core_2_filter | only_applied_core_3_filter
]

only_applied_awarded_core = only_applied_core[
    only_applied_core["results_status"] == "Awarded"
]

probability_only_applied_awarded_core = len(only_applied_awarded_core) / len(
    only_applied_core
)

# Print the result
print(
    f"Probability of being awarded a Core Zone permit given you applied for only the Core Zone in 2022: {probability_only_applied_awarded_core:.2%} ({len(only_applied_awarded_core)}/{len(only_applied_core)})"
)

Probability of being awarded a Core Zone permit given you applied for only the Core Zone in 2022: 2.90% (518/17892)


In [62]:
def sort_zone_probabilities(x):
    return x.sort(key=lambda x: x[1], reverse=True)


def create_zone_probability_dataframe(x, columns):
    return pd.DataFrame(
        x,
        columns=columns,
    )


def add_probability_percent_column(df):
    df["Probability (%)"] = df["Probability"].map("{:.2%}".format)
    return df


def zone_probabilities_to_crosstab(x, columns):
    sort_zone_probabilities(x)
    return add_probability_percent_column(create_zone_probability_dataframe(x, columns))

In [63]:
# Get the probability of being awared a permit for a zone given you applied for that zone

# Get a list of all the zones
zones_values = df["preferred_zone_1"].unique()

prob_awarded_zone_applied_for = []

# Loop over the zones and calculate the probability of being awarded a permit for each zone
for zone in zones_values:
    applied_1 = df["preferred_zone_1"] == zone
    applied_2 = df["preferred_zone_2"] == zone
    applied_3 = df["preferred_zone_3"] == zone

    zone_filter = applied_1 | applied_2 | applied_3

    applied_zone = df[zone_filter]

    total_zone = len(applied_zone)

    awarded_zone_filter = df["awarded_entrance"] == zone

    total_zone_awarded = len(df[zone_filter & awarded_zone_filter])
    prob_zone_awarded = total_zone_awarded / total_zone

    prob_awarded_zone_applied_for.append(
        [zone, prob_zone_awarded, total_zone_awarded, total_zone]
    )

    print(
        f"Probability of being awarded a permit for {zone}, given applied 1+ option in zone: {prob_zone_awarded:.2%} ({total_zone_awarded}/{total_zone})"
    )


df_prob_awarded_zone_applied_for = zone_probabilities_to_crosstab(
    prob_awarded_zone_applied_for,
    ["Zone", "Probability", "Total Awarded", "Total Applied"],
)

# Show crosstab of the new dataframe
df_prob_awarded_zone_applied_for

Probability of being awarded a permit for Core Enchantment Zone, given applied 1+ option in zone: 2.36% (723/30644)
Probability of being awarded a permit for Colchuck Zone, given applied 1+ option in zone: 2.91% (353/12137)
Probability of being awarded a permit for Snow Zone, given applied 1+ option in zone: 7.77% (637/8198)
Probability of being awarded a permit for Stuart  Zone, given applied 1+ option in zone: 10.32% (472/4575)
Probability of being awarded a permit for Eightmile/Caroline Zone, given applied 1+ option in zone: 13.49% (284/2106)
Probability of being awarded a permit for Eightmile/Caroline Zone (stock), given applied 1+ option in zone: 15.23% (39/256)
Probability of being awarded a permit for Stuart Zone (stock), given applied 1+ option in zone: 12.20% (20/164)


Unnamed: 0,Zone,Probability,Total Awarded,Total Applied,Probability (%)
0,Eightmile/Caroline Zone (stock),0.152344,39,256,15.23%
1,Eightmile/Caroline Zone,0.134853,284,2106,13.49%
2,Stuart Zone (stock),0.121951,20,164,12.20%
3,Stuart Zone,0.103169,472,4575,10.32%
4,Snow Zone,0.077702,637,8198,7.77%
5,Colchuck Zone,0.029085,353,12137,2.91%
6,Core Enchantment Zone,0.023594,723,30644,2.36%


## Split


In [64]:
# Import split from csv file
df_split = pd.read_csv(
    "./2022_results_split.csv",
    # Import was failing to parse date columns, so I
    # to add the column names
    parse_dates=[
        "preferred_entry_date",
        "awarded_entry_date",
    ],
    date_format="%m-%d-%Y",  # Align format with export format
    na_filter=False,  # Do not convert 'N/A' to NaN
)

df_split.head()

Unnamed: 0,preferred_zone,preferred_entry_date,minimum_acceptable_group_size,results_status,awarded_preference,awarded_entry_date,awarded_entrance,awarded_group_size,awarded,preferred_option,preferred_entry_date_month,preferred_entry_date_day
0,Core Enchantment Zone,2022-09-02,8,Unsuccessful,0,1970-01-01,,0,False,1,September,Friday
1,Colchuck Zone,2022-08-15,2,Unsuccessful,0,1970-01-01,,0,False,1,August,Monday
2,Snow Zone,2022-08-12,8,Unsuccessful,0,1970-01-01,,0,False,1,August,Friday
3,Core Enchantment Zone,2022-07-12,2,Unsuccessful,0,1970-01-01,,0,False,1,July,Tuesday
4,Stuart Zone,2022-09-03,4,Unsuccessful,0,1970-01-01,,0,False,1,September,Saturday


In [76]:
# Import split skipped from csv file
df_split_skipped = pd.read_csv(
    "./2022_results_split_skipped.csv",
    # Import was failing to parse date columns, so I
    # to add the column names
    parse_dates=[
        "preferred_entry_date",
        "awarded_entry_date",
    ],
    date_format="%m-%d-%Y",  # Align format with export format
    na_filter=False,  # Do not convert 'N/A' to NaN
)

df_split_skipped.head()

Unnamed: 0,preferred_zone,preferred_entry_date,minimum_acceptable_group_size,results_status,awarded_preference,awarded_entry_date,awarded_entrance,awarded_group_size,awarded,preferred_option,preferred_entry_date_month,preferred_entry_date_day
0,Core Enchantment Zone,2022-06-24,4,Awarded,2,2022-07-01,Snow Zone,4,False,1,June,Friday
1,Core Enchantment Zone,2022-09-26,6,Awarded,3,2022-10-04,Stuart Zone,6,False,1,September,Monday
2,Core Enchantment Zone,2022-06-24,2,Awarded,3,2022-10-21,Colchuck Zone,2,False,1,June,Friday
3,Core Enchantment Zone,2022-08-30,3,Awarded,2,2022-08-30,Snow Zone,3,False,1,August,Tuesday
4,Core Enchantment Zone,2022-09-24,4,Awarded,3,2022-09-25,Core Enchantment Zone,4,False,1,September,Saturday


## Zone


In [92]:
# Show total split entries by zone
# Print the result
print(f"Total split entries by zone:")
df_split["preferred_zone"].value_counts()

Total split entries by zone:


preferred_zone
Core Enchantment Zone              70487
Colchuck Zone                      17156
Snow Zone                          11695
Stuart  Zone                        6493
Eightmile/Caroline Zone             2511
Eightmile/Caroline Zone (stock)      287
Stuart Zone (stock)                  181
Name: count, dtype: int64

In [93]:
# Get the total awarded permits by month
awarded_split_filter = df_split["awarded"] == True

df_split_awarded = df_split[awarded_split_filter]

In [96]:
# Show total awarded permits by zone in a dataframe
df_awarded_zone = (
    df_split_awarded["preferred_zone"].value_counts().to_frame(name="Total Awarded")
)

df_awarded_zone

Unnamed: 0_level_0,Total Awarded
preferred_zone,Unnamed: 1_level_1
Core Enchantment Zone,723
Snow Zone,637
Stuart Zone,472
Colchuck Zone,353
Eightmile/Caroline Zone,284
Eightmile/Caroline Zone (stock),39
Stuart Zone (stock),20


In [100]:
# Show total skipped by zone
df_skipped_by_zone = (
    df_split_skipped["preferred_zone"].value_counts().to_frame(name="Total Skipped")
)

df_skipped_by_zone

Unnamed: 0_level_0,Total Skipped
preferred_zone,Unnamed: 1_level_1
Core Enchantment Zone,1105
Colchuck Zone,396
Snow Zone,165
Stuart Zone,128
Eightmile/Caroline Zone,37
Eightmile/Caroline Zone (stock),8
Stuart Zone (stock),7


In [101]:
# Show AtS for zone
df_awarded_skipped_by_zone = pd.merge(
    df_awarded_zone,
    df_skipped_by_zone,
    on="preferred_zone",
)

# Calculate the AtS for each zone
df_awarded_skipped_by_zone["ratio"] = (
    df_awarded_skipped_by_zone["Total Awarded"]
    / df_awarded_skipped_by_zone["Total Skipped"]
)

# Show only two decimal places for ratio
df_awarded_skipped_by_zone["ratio"] = df_awarded_skipped_by_zone["ratio"].map(
    "{:.2f}".format
)

# Sort the dataframe by the ratio column
df_awarded_skipped_by_zone = df_awarded_skipped_by_zone.sort_values(
    by="ratio", ascending=False
)

df_awarded_skipped_by_zone

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
preferred_zone,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Eightmile/Caroline Zone,284,37,7.68
Eightmile/Caroline Zone (stock),39,8,4.88
Snow Zone,637,165,3.86
Stuart Zone,472,128,3.69
Stuart Zone (stock),20,7,2.86
Colchuck Zone,353,396,0.89
Core Enchantment Zone,723,1105,0.65


## Month


In [65]:
# Show total entries by month
df_split["preferred_entry_date_month"].value_counts()

preferred_entry_date_month
August       38716
July         28707
September    25399
June          9819
October       4335
May           1834
Name: count, dtype: int64

In [97]:
# Group awarded by month
df_awarded_by_month = (
    df_split_awarded.groupby("preferred_entry_date_month")["awarded"]
    .count()
    .to_frame(name="Total Awarded")
)

df_awarded_by_month

Unnamed: 0_level_0,Total Awarded
preferred_entry_date_month,Unnamed: 1_level_1
August,491
July,490
June,469
May,249
October,357
September,472


In [98]:
# Get the total of permits skipped by month
# Group skipped by month
df_skipped_by_month = (
    df_split_skipped.groupby("preferred_entry_date_month")
    .size()
    .to_frame(name="Total Skipped")
)

df_skipped_by_month

Unnamed: 0_level_0,Total Skipped
preferred_entry_date_month,Unnamed: 1_level_1
August,456
July,420
June,260
May,71
October,180
September,459


In [99]:
# Merge awarded and skipped by month
df_awarded_skipped_by_month = pd.merge(
    df_awarded_by_month,
    df_skipped_by_month,
    on="preferred_entry_date_month",
)

# Calculate ratio of awarded to skipped
df_awarded_skipped_by_month["ratio"] = (
    df_awarded_skipped_by_month["Total Awarded"]
    / df_awarded_skipped_by_month["Total Skipped"]
)

# Only show two decimals for ratio
df_awarded_skipped_by_month["ratio"] = df_awarded_skipped_by_month["ratio"].map(
    "{:.2f}".format
)

# Sort by ratio
df_awarded_skipped_by_month = df_awarded_skipped_by_month.sort_values(
    by="ratio", ascending=False
)

# Sort the columns
df_awarded_skipped_by_month = df_awarded_skipped_by_month[
    ["Total Awarded", "Total Skipped", "ratio"]
]

# Show dataframe
df_awarded_skipped_by_month

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
preferred_entry_date_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
May,249,71,3.51
October,357,180,1.98
June,469,260,1.8
July,490,420,1.17
August,491,456,1.08
September,472,459,1.03


## Day of the week


In [102]:
# Show total entries by day of the week
df_split["preferred_entry_date_day"].value_counts()

preferred_entry_date_day
Friday       24370
Thursday     19072
Saturday     15758
Monday       14582
Wednesday    13606
Tuesday      12566
Sunday        8856
Name: count, dtype: int64

In [103]:
# Get the total awarded permit entries by day of the week
df_awarded_by_day = (
    df_split_awarded["preferred_entry_date_day"]
    .value_counts()
    .to_frame(name="Total Awarded")
)

df_awarded_by_day

Unnamed: 0_level_0,Total Awarded
preferred_entry_date_day,Unnamed: 1_level_1
Sunday,462
Monday,353
Friday,349
Saturday,349
Thursday,343
Tuesday,341
Wednesday,331


In [104]:
# Get the total skipped permit entries by day of the week
df_skipped_by_day = (
    df_split_skipped["preferred_entry_date_day"]
    .value_counts()
    .to_frame(name="Total Skipped")
)

df_skipped_by_day

Unnamed: 0_level_0,Total Skipped
preferred_entry_date_day,Unnamed: 1_level_1
Friday,360
Monday,296
Thursday,282
Saturday,266
Wednesday,223
Tuesday,213
Sunday,206


In [105]:
# Merge awarded and skipped by day of the week
df_awarded_skipped_by_day = pd.merge(
    df_awarded_by_day,
    df_skipped_by_day,
    on="preferred_entry_date_day",
)

# Calculate ratio of awarded to skipped
df_awarded_skipped_by_day["ratio"] = (
    df_awarded_skipped_by_day["Total Awarded"]
    / df_awarded_skipped_by_day["Total Skipped"]
)

# Only show two decimals for ratio
df_awarded_skipped_by_day["ratio"] = df_awarded_skipped_by_day["ratio"].map(
    "{:.2f}".format
)

# Sort by ratio
df_awarded_skipped_by_day = df_awarded_skipped_by_day.sort_values(
    by="ratio", ascending=False
)

# Sort the columns
df_awarded_skipped_by_day = df_awarded_skipped_by_day[
    ["Total Awarded", "Total Skipped", "ratio"]
]

# Show dataframe
df_awarded_skipped_by_day

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
preferred_entry_date_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunday,462,206,2.24
Tuesday,341,213,1.6
Wednesday,331,223,1.48
Saturday,349,266,1.31
Thursday,343,282,1.22
Monday,353,296,1.19
Friday,349,360,0.97


## Core Enchantment Zone


In [110]:
# Filter the df_split dataframe for only entries for Core Enchantment Zone
core_filter = df_split["preferred_zone"] == "Core Enchantment Zone"

df_split_core = df_split[core_filter]

# Filter the df_split_core dataframe for only awarded entries
awarded_filter = df_split_core["awarded"] == True

df_split_core_awarded = df_split_core[awarded_filter]

# Filter the skipped data frame for only Core Enchantment Zone entries
df_split_skipped_core = df_split_skipped[
    df_split_skipped["preferred_zone"] == "Core Enchantment Zone"
]

### Month


In [108]:
# Show the total entries by month for Core Enchantment Zone
df_split_core["preferred_entry_date_month"].value_counts()

preferred_entry_date_month
August       25358
July         18103
September    17023
June          5806
October       3132
May           1065
Name: count, dtype: int64

In [107]:
# Get the Core Enchantment Zone entries awarded by month and create a dataframe
df_core_awarded_by_month = (
    df_split_core_awarded.groupby("preferred_entry_date_month")["awarded"]
    .count()
    .to_frame(name="Total Awarded")
)

df_core_awarded_by_month

Unnamed: 0_level_0,Total Awarded
preferred_entry_date_month,Unnamed: 1_level_1
August,134
July,129
June,124
May,77
October,134
September,125


In [111]:
# Get the total of Core Enchantment Zone permits skipped by month
df_core_skipped_by_month = (
    df_split_skipped_core.groupby("preferred_entry_date_month")
    .size()
    .to_frame(name="Total Skipped")
)

df_core_skipped_by_month

Unnamed: 0_level_0,Total Skipped
preferred_entry_date_month,Unnamed: 1_level_1
August,277
July,254
June,143
May,39
October,113
September,279


In [112]:
# Merge awarded and skipped by month for Core Enchantment Zone
df_core_awarded_skipped_by_month = pd.merge(
    df_core_awarded_by_month,
    df_core_skipped_by_month,
    on="preferred_entry_date_month",
)

# Calculate ratio of awarded to skipped for Core Enchantment Zone
df_core_awarded_skipped_by_month["ratio"] = (
    df_core_awarded_skipped_by_month["Total Awarded"]
    / df_core_awarded_skipped_by_month["Total Skipped"]
)

# Only show two decimals for ratio
df_core_awarded_skipped_by_month["ratio"] = df_core_awarded_skipped_by_month[
    "ratio"
].map("{:.2f}".format)

# Sort by ratio
df_core_awarded_skipped_by_month = df_core_awarded_skipped_by_month.sort_values(
    by="ratio", ascending=False
)

# Sort the columns
df_core_awarded_skipped_by_month = df_core_awarded_skipped_by_month[
    ["Total Awarded", "Total Skipped", "ratio"]
]

# Show dataframe
df_core_awarded_skipped_by_month

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
preferred_entry_date_month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
May,77,39,1.97
October,134,113,1.19
June,124,143,0.87
July,129,254,0.51
August,134,277,0.48
September,125,279,0.45


### Day of the Week


In [113]:
# Get the total entries awarded by day of the week in the Core Zone
df_core_awarded_by_day = (
    df_split_core_awarded["preferred_entry_date_day"]
    .value_counts()
    .to_frame(name="Total Awarded")
)

df_core_awarded_by_day

Unnamed: 0_level_0,Total Awarded
preferred_entry_date_day,Unnamed: 1_level_1
Sunday,132
Tuesday,108
Monday,107
Saturday,97
Wednesday,96
Thursday,92
Friday,91


In [114]:
# Get the total entries skipped by day of the week in the Core Zone
df_core_skipped_by_day = (
    df_split_skipped_core["preferred_entry_date_day"]
    .value_counts()
    .to_frame(name="Total Skipped")
)

df_core_skipped_by_day

Unnamed: 0_level_0,Total Skipped
preferred_entry_date_day,Unnamed: 1_level_1
Friday,195
Thursday,175
Monday,173
Saturday,155
Wednesday,149
Tuesday,132
Sunday,126


In [115]:
# Merge awarded and skipped by day of the week for Core Enchantment Zone
df_core_awarded_skipped_by_day = pd.merge(
    df_core_awarded_by_day,
    df_core_skipped_by_day,
    on="preferred_entry_date_day",
)

# Calculate ratio of awarded to skipped for Core Enchantment Zone
df_core_awarded_skipped_by_day["ratio"] = (
    df_core_awarded_skipped_by_day["Total Awarded"]
    / df_core_awarded_skipped_by_day["Total Skipped"]
)

# Only show two decimals for ratio
df_core_awarded_skipped_by_day["ratio"] = df_core_awarded_skipped_by_day["ratio"].map(
    "{:.2f}".format
)

# Sort by ratio
df_core_awarded_skipped_by_day = df_core_awarded_skipped_by_day.sort_values(
    by="ratio", ascending=False
)

# Sort the columns
df_core_awarded_skipped_by_day = df_core_awarded_skipped_by_day[
    ["Total Awarded", "Total Skipped", "ratio"]
]

# Show dataframe
df_core_awarded_skipped_by_day

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
preferred_entry_date_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunday,132,126,1.05
Tuesday,108,132,0.82
Wednesday,96,149,0.64
Saturday,97,155,0.63
Monday,107,173,0.62
Thursday,92,175,0.53
Friday,91,195,0.47


### Group Size


In [124]:
# Show total entries by group size for Core Enchantment Zone
df_split_core["minimum_acceptable_group_size"].value_counts()

minimum_acceptable_group_size
4    20734
8    14391
2    12463
6    12198
5     4721
3     4332
7      835
1      813
Name: count, dtype: int64

In [121]:
# Get the total number of awarded permits by group size in Core Enchantment Zone
df_core_awarded_by_group_size = (
    df_split_core_awarded["minimum_acceptable_group_size"]
    .value_counts()
    .to_frame(name="Total Awarded")
)

df_core_awarded_by_group_size

Unnamed: 0_level_0,Total Awarded
minimum_acceptable_group_size,Unnamed: 1_level_1
4,198
2,196
8,92
6,86
1,62
3,44
5,34
7,11


In [122]:
# Get the total number of skipped permits by group size in Core Enchantment Zone
df_core_skipped_by_group_size = (
    df_split_skipped_core["minimum_acceptable_group_size"]
    .value_counts()
    .to_frame(name="Total Skipped")
)

df_core_skipped_by_group_size

Unnamed: 0_level_0,Total Skipped
minimum_acceptable_group_size,Unnamed: 1_level_1
4,304
2,231
6,187
8,164
5,95
3,76
1,27
7,21


In [123]:
# Merge awarded and skipped by group size for Core Enchantment Zone
df_core_awarded_skipped_by_group_size = pd.merge(
    df_core_awarded_by_group_size,
    df_core_skipped_by_group_size,
    on="minimum_acceptable_group_size",
)

# Calculate ratio of awarded to skipped for Core Enchantment Zone
df_core_awarded_skipped_by_group_size["ratio"] = (
    df_core_awarded_skipped_by_group_size["Total Awarded"]
    / df_core_awarded_skipped_by_group_size["Total Skipped"]
)

# Only show two decimals for ratio
df_core_awarded_skipped_by_group_size["ratio"] = df_core_awarded_skipped_by_group_size[
    "ratio"
].map("{:.2f}".format)

# Sort by ratio
df_core_awarded_skipped_by_group_size = (
    df_core_awarded_skipped_by_group_size.sort_values(by="ratio", ascending=False)
)

# Sort the columns
df_core_awarded_skipped_by_group_size = df_core_awarded_skipped_by_group_size[
    ["Total Awarded", "Total Skipped", "ratio"]
]

# Show dataframe
df_core_awarded_skipped_by_group_size

Unnamed: 0_level_0,Total Awarded,Total Skipped,ratio
minimum_acceptable_group_size,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,62,27,2.3
2,196,231,0.85
4,198,304,0.65
3,44,76,0.58
8,92,164,0.56
7,11,21,0.52
6,86,187,0.46
5,34,95,0.36


### Entrance awarded by day of the week in August for Core Enchantment Zone


In [135]:
# Filter the df_split_core dataframe for August entries
august_filter = df_split_core["preferred_entry_date_month"] == "August"

df_split_core_august = df_split_core[august_filter]

# Filter the df_split_core_august dataframe for only awarded entries
awarded_august_filter = df_split_core_august["awarded"] == True

df_split_core_august_awarded = df_split_core_august[awarded_august_filter]

# Sum the awarded_group_size column by day of the week
df_core_august_awarded_by_day = (
    df_split_core_august_awarded.groupby("preferred_entry_date_day")[
        "awarded_group_size"
    ]
    .sum()
    .to_frame(name="Awarded Sunday Entrance in August Core Zone")
)

# Add column totaling the number of occurences each day had in August, i.e., August had 4 Sundays
df_core_august_awarded_by_day["Total Days"] = df_split_core_august_awarded.groupby(
    "preferred_entry_date_day"
)["preferred_entry_date"].nunique()

# Calculate the average group size for each day of the week in August
df_core_august_awarded_by_day["Average Awarded/Day"] = (
    df_core_august_awarded_by_day["Awarded Sunday Entrance in August Core Zone"]
    / df_core_august_awarded_by_day["Total Days"]
)

# Sort by Awarded Sunday Entrance in August Core Zone
df_core_august_awarded_by_day = df_core_august_awarded_by_day.sort_values(
    by="Awarded Sunday Entrance in August Core Zone", ascending=False
)

df_core_august_awarded_by_day

Unnamed: 0_level_0,Awarded Sunday Entrance in August Core Zone,Total Days,Average Awarded/Day
preferred_entry_date_day,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Sunday,96,4,24.0
Monday,80,5,16.0
Tuesday,80,5,16.0
Wednesday,80,5,16.0
Friday,64,4,16.0
Saturday,64,4,16.0
Thursday,64,4,16.0


### Awarded group size on Sunday in August for Core Enchantment Zone


In [134]:
# Filter the August data for only Sundays
sunday_filter = df_split_core_august_awarded["preferred_entry_date_day"] == "Sunday"

df_split_core_august_awarded_sunday = df_split_core_august_awarded[sunday_filter]

# Get the total number of awarded permits by group size in Core Enchantment Zone for August Sundays
df_core_august_sunday_awarded_by_group_size = (
    df_split_core_august_awarded_sunday["awarded_group_size"]
    .value_counts()
    .to_frame(name="Total Awarded")
)

df_core_august_sunday_awarded_by_group_size

Unnamed: 0_level_0,Total Awarded
awarded_group_size,Unnamed: 1_level_1
2,5
6,4
4,4
8,3
1,2
5,2
7,1
3,1
