# Clean


In [2]:
import os

CSV_FILE_NAME = "2020_results.csv"

# Keep everything before .csv
STRIPPED_CSV_FILE_NAME = CSV_FILE_NAME.split(".")[0]

FILE_PATH = os.path.join(os.getcwd(), CSV_FILE_NAME)

## Import


In [3]:
import pandas as pd

# Import 2021 Enchantments Lottery Data
raw_df = pd.read_csv(
    FILE_PATH,
    header=0,
    parse_dates=[
        "Preferred Entry Date 1",
        "Preferred Entry Date 2",
        "Preferred Entry Date 3",
        "Awarded Entry Date",
    ],
    date_format="%m-%d-%Y",
    low_memory=False,
)

# Take a quick look at the data
raw_df.head()

Unnamed: 0,Permit Type,Preferred Entry Date 1,Preferred Zone 1,Preferred Entry Date 2,Preferred Zone 2,Preferred Entry Date 3,Preferred Zone 3,Results Status,Awarded Preference,Awarded Entry Date,Awarded Entrance Code/Name,Awarded Group Size
0,Overnight Permit,6/26/2020,Colchuck Zone,6/26/2020,Core Enchantment Zone,7/10/2020,Core Enchantment Zone,Unsuccessful,,,,
1,Overnight Permit,8/1/2020,Core Enchantment Zone,9/12/2020,Core Enchantment Zone,6/21/2020,Core Enchantment Zone,Unsuccessful,,,,
2,Overnight Permit,9/19/2020,Core Enchantment Zone,9/12/2020,Core Enchantment Zone,9/26/2020,Core Enchantment Zone,Unsuccessful,,,,
3,Overnight Permit,8/22/2020,Core Enchantment Zone,9/17/2020,Core Enchantment Zone,9/18/2020,Core Enchantment Zone,Unsuccessful,,,,
4,Overnight Permit,7/17/2020,Snow Zone,7/24/2020,Colchuck Zone,7/17/2020,Core Enchantment Zone,Awarded,1.0,7/17/2020,Snow Zone,6.0


## Create Cleaned Full Application Dataframe


In [4]:
raw_df.dtypes

Permit Type                    object
Preferred Entry Date 1         object
Preferred Zone 1               object
Preferred Entry Date 2         object
Preferred Zone 2               object
Preferred Entry Date 3         object
Preferred Zone 3               object
Results Status                 object
Awarded Preference            float64
Awarded Entry Date             object
Awarded Entrance Code/Name     object
Awarded Group Size            float64
dtype: object

In [5]:
# Identify columns with date data
date_columns = [
    "Preferred Entry Date 1",
    "Preferred Entry Date 2",
    "Preferred Entry Date 3",
    "Awarded Entry Date",
]

# Convert date columns to datetime
for col in date_columns:
    raw_df[col] = pd.to_datetime(raw_df[col])

# Check column data types
raw_df.dtypes

Permit Type                           object
Preferred Entry Date 1        datetime64[ns]
Preferred Zone 1                      object
Preferred Entry Date 2        datetime64[ns]
Preferred Zone 2                      object
Preferred Entry Date 3        datetime64[ns]
Preferred Zone 3                      object
Results Status                        object
Awarded Preference                   float64
Awarded Entry Date            datetime64[ns]
Awarded Entrance Code/Name            object
Awarded Group Size                   float64
dtype: object

In [6]:
# Number columns to convert NaN values to 0
number_columns = [
    "Awarded Preference",
    "Awarded Group Size",
]

# Convert NaN values to 0
for col in number_columns:
    raw_df[col] = raw_df[col].fillna(0)

# Convert float to int
for col in raw_df.columns:
    if raw_df[col].dtype == "float64":
        raw_df[col] = raw_df[col].astype(int)

# Check column data types
raw_df.dtypes

Permit Type                           object
Preferred Entry Date 1        datetime64[ns]
Preferred Zone 1                      object
Preferred Entry Date 2        datetime64[ns]
Preferred Zone 2                      object
Preferred Entry Date 3        datetime64[ns]
Preferred Zone 3                      object
Results Status                        object
Awarded Preference                     int64
Awarded Entry Date            datetime64[ns]
Awarded Entrance Code/Name            object
Awarded Group Size                     int64
dtype: object

In [7]:
# Fill NaN values in string columns and convert to string
columns_to_convert = [
    "Preferred Zone 1",
    "Preferred Zone 2",
    "Preferred Zone 3",
    "Results Status",
    "Awarded Entrance Code/Name",
]
for col in columns_to_convert:
    # Converting to string may be unneccessary here
    raw_df[col] = raw_df[col].fillna("N/A").astype(str)

# Check column data types
raw_df.dtypes

Permit Type                           object
Preferred Entry Date 1        datetime64[ns]
Preferred Zone 1                      object
Preferred Entry Date 2        datetime64[ns]
Preferred Zone 2                      object
Preferred Entry Date 3        datetime64[ns]
Preferred Zone 3                      object
Results Status                        object
Awarded Preference                     int64
Awarded Entry Date            datetime64[ns]
Awarded Entrance Code/Name            object
Awarded Group Size                     int64
dtype: object

In [8]:
# Check for NaN values
raw_df.isna().sum()

Permit Type                       0
Preferred Entry Date 1            0
Preferred Zone 1                  0
Preferred Entry Date 2          368
Preferred Zone 2                  0
Preferred Entry Date 3          934
Preferred Zone 3                  0
Results Status                    0
Awarded Preference                0
Awarded Entry Date            24036
Awarded Entrance Code/Name        0
Awarded Group Size                0
dtype: int64

In [9]:
# Convert NaN values in date columns to 0
# This feels like an odd approach, but I want to maintain the date data type.
# The analyst will need to understand that zero epoch dates are actually NaN values.
for col in date_columns:  # Date columns defined in previous cell
    raw_df[col] = raw_df[col].fillna(pd.Timestamp(0))

In [10]:
# Check for NaN values
raw_df.isna().sum()

Permit Type                   0
Preferred Entry Date 1        0
Preferred Zone 1              0
Preferred Entry Date 2        0
Preferred Zone 2              0
Preferred Entry Date 3        0
Preferred Zone 3              0
Results Status                0
Awarded Preference            0
Awarded Entry Date            0
Awarded Entrance Code/Name    0
Awarded Group Size            0
dtype: int64

In [11]:
# Check data types
raw_df.dtypes

Permit Type                           object
Preferred Entry Date 1        datetime64[ns]
Preferred Zone 1                      object
Preferred Entry Date 2        datetime64[ns]
Preferred Zone 2                      object
Preferred Entry Date 3        datetime64[ns]
Preferred Zone 3                      object
Results Status                        object
Awarded Preference                     int64
Awarded Entry Date            datetime64[ns]
Awarded Entrance Code/Name            object
Awarded Group Size                     int64
dtype: object

In [12]:
# Check values for each column
for col in raw_df.columns:
    print(f"{col}: {raw_df[col].unique()}\n\n\n")

Permit Type: ['Overnight Permit']



Preferred Entry Date 1: <DatetimeArray>
['2020-06-26 00:00:00', '2020-08-01 00:00:00', '2020-09-19 00:00:00',
 '2020-08-22 00:00:00', '2020-07-17 00:00:00', '2020-07-30 00:00:00',
 '2020-10-14 00:00:00', '2020-09-05 00:00:00', '2020-08-12 00:00:00',
 '2020-09-10 00:00:00',
 ...
 '2020-05-28 00:00:00', '2020-10-21 00:00:00', '2020-05-17 00:00:00',
 '2020-10-12 00:00:00', '2020-10-22 00:00:00', '2020-10-30 00:00:00',
 '2020-10-28 00:00:00', '2020-10-25 00:00:00', '2020-10-13 00:00:00',
 '2020-10-26 00:00:00']
Length: 167, dtype: datetime64[ns]



Preferred Zone 1: ['Colchuck Zone' 'Core Enchantment Zone' 'Snow Zone'
 'Eightmile/Caroline Zone (stock)' 'Stuart  Zone'
 'Eightmile/Caroline Zone' 'Stuart Zone (stock)']



Preferred Entry Date 2: <DatetimeArray>
['2020-06-26 00:00:00', '2020-09-12 00:00:00', '2020-09-17 00:00:00',
 '2020-07-24 00:00:00', '2020-08-04 00:00:00', '1970-01-01 00:00:00',
 '2020-08-27 00:00:00', '2020-08-05 00:00:00', '2020-07-14

In [13]:
# Examine the first 20 rows
raw_df.head(20)

Unnamed: 0,Permit Type,Preferred Entry Date 1,Preferred Zone 1,Preferred Entry Date 2,Preferred Zone 2,Preferred Entry Date 3,Preferred Zone 3,Results Status,Awarded Preference,Awarded Entry Date,Awarded Entrance Code/Name,Awarded Group Size
0,Overnight Permit,2020-06-26,Colchuck Zone,2020-06-26,Core Enchantment Zone,2020-07-10,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
1,Overnight Permit,2020-08-01,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-06-21,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
2,Overnight Permit,2020-09-19,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-09-26,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
3,Overnight Permit,2020-08-22,Core Enchantment Zone,2020-09-17,Core Enchantment Zone,2020-09-18,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
4,Overnight Permit,2020-07-17,Snow Zone,2020-07-24,Colchuck Zone,2020-07-17,Core Enchantment Zone,Awarded,1,2020-07-17,Snow Zone,6
5,Overnight Permit,2020-07-30,Core Enchantment Zone,2020-08-04,Colchuck Zone,2020-09-28,Colchuck Zone,Unsuccessful,0,1970-01-01,,0
6,Overnight Permit,2020-10-14,Colchuck Zone,1970-01-01,,1970-01-01,,Awarded,1,2020-10-14,Colchuck Zone,2
7,Overnight Permit,2020-09-05,Eightmile/Caroline Zone (stock),2020-08-27,Core Enchantment Zone,2020-07-31,Stuart Zone,Awarded,1,2020-09-05,Eightmile/Caroline Zone (stock),6
8,Overnight Permit,2020-08-01,Core Enchantment Zone,2020-08-05,Core Enchantment Zone,2020-08-03,Stuart Zone,Unsuccessful,0,1970-01-01,,0
9,Overnight Permit,2020-07-17,Colchuck Zone,2020-07-14,Core Enchantment Zone,2020-07-24,Stuart Zone,Unsuccessful,0,1970-01-01,,0


In [14]:
# Drop the permit type column
raw_df = raw_df.drop(
    columns=[
        "Permit Type",
    ]
)

# Check the data
raw_df.head(20)

Unnamed: 0,Preferred Entry Date 1,Preferred Zone 1,Preferred Entry Date 2,Preferred Zone 2,Preferred Entry Date 3,Preferred Zone 3,Results Status,Awarded Preference,Awarded Entry Date,Awarded Entrance Code/Name,Awarded Group Size
0,2020-06-26,Colchuck Zone,2020-06-26,Core Enchantment Zone,2020-07-10,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
1,2020-08-01,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-06-21,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
2,2020-09-19,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-09-26,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
3,2020-08-22,Core Enchantment Zone,2020-09-17,Core Enchantment Zone,2020-09-18,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
4,2020-07-17,Snow Zone,2020-07-24,Colchuck Zone,2020-07-17,Core Enchantment Zone,Awarded,1,2020-07-17,Snow Zone,6
5,2020-07-30,Core Enchantment Zone,2020-08-04,Colchuck Zone,2020-09-28,Colchuck Zone,Unsuccessful,0,1970-01-01,,0
6,2020-10-14,Colchuck Zone,1970-01-01,,1970-01-01,,Awarded,1,2020-10-14,Colchuck Zone,2
7,2020-09-05,Eightmile/Caroline Zone (stock),2020-08-27,Core Enchantment Zone,2020-07-31,Stuart Zone,Awarded,1,2020-09-05,Eightmile/Caroline Zone (stock),6
8,2020-08-01,Core Enchantment Zone,2020-08-05,Core Enchantment Zone,2020-08-03,Stuart Zone,Unsuccessful,0,1970-01-01,,0
9,2020-07-17,Colchuck Zone,2020-07-14,Core Enchantment Zone,2020-07-24,Stuart Zone,Unsuccessful,0,1970-01-01,,0


In [15]:
# Change columns names to lower case with underscores for spaces
raw_df.columns = [
    col.lower().replace(" ", "_").replace("/", "_") for col in raw_df.columns
]

# Check the names
raw_df.columns

Index(['preferred_entry_date_1', 'preferred_zone_1', 'preferred_entry_date_2',
       'preferred_zone_2', 'preferred_entry_date_3', 'preferred_zone_3',
       'results_status', 'awarded_preference', 'awarded_entry_date',
       'awarded_entrance_code_name', 'awarded_group_size'],
      dtype='object')

In [16]:
# Check the data
raw_df.head(20)

Unnamed: 0,preferred_entry_date_1,preferred_zone_1,preferred_entry_date_2,preferred_zone_2,preferred_entry_date_3,preferred_zone_3,results_status,awarded_preference,awarded_entry_date,awarded_entrance_code_name,awarded_group_size
0,2020-06-26,Colchuck Zone,2020-06-26,Core Enchantment Zone,2020-07-10,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
1,2020-08-01,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-06-21,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
2,2020-09-19,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-09-26,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
3,2020-08-22,Core Enchantment Zone,2020-09-17,Core Enchantment Zone,2020-09-18,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
4,2020-07-17,Snow Zone,2020-07-24,Colchuck Zone,2020-07-17,Core Enchantment Zone,Awarded,1,2020-07-17,Snow Zone,6
5,2020-07-30,Core Enchantment Zone,2020-08-04,Colchuck Zone,2020-09-28,Colchuck Zone,Unsuccessful,0,1970-01-01,,0
6,2020-10-14,Colchuck Zone,1970-01-01,,1970-01-01,,Awarded,1,2020-10-14,Colchuck Zone,2
7,2020-09-05,Eightmile/Caroline Zone (stock),2020-08-27,Core Enchantment Zone,2020-07-31,Stuart Zone,Awarded,1,2020-09-05,Eightmile/Caroline Zone (stock),6
8,2020-08-01,Core Enchantment Zone,2020-08-05,Core Enchantment Zone,2020-08-03,Stuart Zone,Unsuccessful,0,1970-01-01,,0
9,2020-07-17,Colchuck Zone,2020-07-14,Core Enchantment Zone,2020-07-24,Stuart Zone,Unsuccessful,0,1970-01-01,,0


In [17]:
# There is one row where the status is 'Applied' and the processing sequence is 0. It seems like some sort of mistake
# And I won't be able to work easily with it. I will drop this row.
# Delete row where status is Applied
raw_df = raw_df[raw_df["results_status"] != "Applied"]

# Check the data
raw_df.head(20)

Unnamed: 0,preferred_entry_date_1,preferred_zone_1,preferred_entry_date_2,preferred_zone_2,preferred_entry_date_3,preferred_zone_3,results_status,awarded_preference,awarded_entry_date,awarded_entrance_code_name,awarded_group_size
0,2020-06-26,Colchuck Zone,2020-06-26,Core Enchantment Zone,2020-07-10,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
1,2020-08-01,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-06-21,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
2,2020-09-19,Core Enchantment Zone,2020-09-12,Core Enchantment Zone,2020-09-26,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
3,2020-08-22,Core Enchantment Zone,2020-09-17,Core Enchantment Zone,2020-09-18,Core Enchantment Zone,Unsuccessful,0,1970-01-01,,0
4,2020-07-17,Snow Zone,2020-07-24,Colchuck Zone,2020-07-17,Core Enchantment Zone,Awarded,1,2020-07-17,Snow Zone,6
5,2020-07-30,Core Enchantment Zone,2020-08-04,Colchuck Zone,2020-09-28,Colchuck Zone,Unsuccessful,0,1970-01-01,,0
6,2020-10-14,Colchuck Zone,1970-01-01,,1970-01-01,,Awarded,1,2020-10-14,Colchuck Zone,2
7,2020-09-05,Eightmile/Caroline Zone (stock),2020-08-27,Core Enchantment Zone,2020-07-31,Stuart Zone,Awarded,1,2020-09-05,Eightmile/Caroline Zone (stock),6
8,2020-08-01,Core Enchantment Zone,2020-08-05,Core Enchantment Zone,2020-08-03,Stuart Zone,Unsuccessful,0,1970-01-01,,0
9,2020-07-17,Colchuck Zone,2020-07-14,Core Enchantment Zone,2020-07-24,Stuart Zone,Unsuccessful,0,1970-01-01,,0


In [18]:
CLEANED_FILE_PATH = os.path.join(os.getcwd(), f"{STRIPPED_CSV_FILE_NAME}_cleaned.csv")

# Export cleaned data to csv
raw_df.to_csv(os.path.join(CLEANED_FILE_PATH), index=False, date_format="%m-%d-%Y")

In [19]:
# Check import of cleaned data
cleaned_raw_df = pd.read_csv(
    CLEANED_FILE_PATH,
    # Import was failing to parse date columns, so I
    # had to pass in the column names
    parse_dates=[
        "preferred_entry_date_1",
        "preferred_entry_date_2",
        "preferred_entry_date_3",
        "awarded_entry_date",
    ],
    date_format="%m-%d-%Y",  # Align format with export format
    na_filter=False,  # Do not convert 'N/A' to NaN
)

# Check the datatypes
cleaned_raw_df.dtypes

preferred_entry_date_1        datetime64[ns]
preferred_zone_1                      object
preferred_entry_date_2        datetime64[ns]
preferred_zone_2                      object
preferred_entry_date_3        datetime64[ns]
preferred_zone_3                      object
results_status                        object
awarded_preference                     int64
awarded_entry_date            datetime64[ns]
awarded_entrance_code_name            object
awarded_group_size                     int64
dtype: object

## Create Cleaned Split Dataframe


In [21]:
# It may be better to break up each individual entry into its own row, so that the data can be analyzed more easily.
preferred_options = [1, 2, 3]

# Columns that every dataframe will have
shared_columns = [
    "results_status",
    "awarded_preference",
    "awarded_entry_date",
    "awarded_entrance_code_name",
    "awarded_group_size",
]
new_dataframes = []

# Iterate over each option number creating a new dataframe for each
for option in preferred_options:
    # Get the columns for the current option
    columns = [
        f"preferred_zone_{option}",
        f"preferred_entry_date_{option}",
    ]
    # Create a new dataframe for the current option
    df_option = cleaned_raw_df[columns + shared_columns].copy()
    # Rename the columns to remove the option number
    df_option.columns = [
        "preferred_zone",
        "preferred_entry_date",
    ] + shared_columns
    # Add a column to indicate if the permit was awarded for the current option
    df_option["awarded"] = df_option["awarded_preference"] == option
    df_option["preferred_option"] = option

    # Append the new dataframe to the list of dataframes
    new_dataframes.append(df_option)

# Concatenate the list of dataframes into a single dataframe
df_split = pd.concat(new_dataframes)

# Drop rows where the preferred division is N/A
df_split = df_split[df_split["preferred_zone"] != "N/A"]

# Check the new dataframe
df_split.head()

Unnamed: 0,preferred_zone,preferred_entry_date,results_status,awarded_preference,awarded_entry_date,awarded_entrance_code_name,awarded_group_size,awarded,preferred_option
0,Colchuck Zone,2020-06-26,Unsuccessful,0,1970-01-01,,0,False,1
1,Core Enchantment Zone,2020-08-01,Unsuccessful,0,1970-01-01,,0,False,1
2,Core Enchantment Zone,2020-09-19,Unsuccessful,0,1970-01-01,,0,False,1
3,Core Enchantment Zone,2020-08-22,Unsuccessful,0,1970-01-01,,0,False,1
4,Snow Zone,2020-07-17,Awarded,1,2020-07-17,Snow Zone,6,True,1


In [22]:
# Add the month of the preferred entry date to the dataframe
import calendar

# Get the month as an integer
df_split["preferred_entry_date" + "_month"] = df_split["preferred_entry_date"].dt.month
# Get the month as a string
df_split["preferred_entry_date" + "_month"] = df_split[
    "preferred_entry_date" + "_month"
].apply(lambda x: calendar.month_name[x])

# Check the data
df_split.head()

Unnamed: 0,preferred_zone,preferred_entry_date,results_status,awarded_preference,awarded_entry_date,awarded_entrance_code_name,awarded_group_size,awarded,preferred_option,preferred_entry_date_month
0,Colchuck Zone,2020-06-26,Unsuccessful,0,1970-01-01,,0,False,1,June
1,Core Enchantment Zone,2020-08-01,Unsuccessful,0,1970-01-01,,0,False,1,August
2,Core Enchantment Zone,2020-09-19,Unsuccessful,0,1970-01-01,,0,False,1,September
3,Core Enchantment Zone,2020-08-22,Unsuccessful,0,1970-01-01,,0,False,1,August
4,Snow Zone,2020-07-17,Awarded,1,2020-07-17,Snow Zone,6,True,1,July


In [23]:
# Add the day of the week columns based on preferred entry date
df_split["preferred_entry_date" + "_day"] = df_split[
    "preferred_entry_date"
].dt.day_name()

# Check the data
df_split.head()

Unnamed: 0,preferred_zone,preferred_entry_date,results_status,awarded_preference,awarded_entry_date,awarded_entrance_code_name,awarded_group_size,awarded,preferred_option,preferred_entry_date_month,preferred_entry_date_day
0,Colchuck Zone,2020-06-26,Unsuccessful,0,1970-01-01,,0,False,1,June,Friday
1,Core Enchantment Zone,2020-08-01,Unsuccessful,0,1970-01-01,,0,False,1,August,Saturday
2,Core Enchantment Zone,2020-09-19,Unsuccessful,0,1970-01-01,,0,False,1,September,Saturday
3,Core Enchantment Zone,2020-08-22,Unsuccessful,0,1970-01-01,,0,False,1,August,Saturday
4,Snow Zone,2020-07-17,Awarded,1,2020-07-17,Snow Zone,6,True,1,July,Friday


In [24]:
SPLIT_FILE_PATH = os.path.join(os.getcwd(), f"{STRIPPED_CSV_FILE_NAME}_split.csv")

# Export the split data to a csv file
df_split.to_csv(SPLIT_FILE_PATH, index=False, date_format="%m-%d-%Y")

In [25]:
# Create a datatframe of skipped entries
# Find where the preferred option and the awarded preference are 0
awarded_preference_greater_than_zero = df_split["awarded_preference"] > 0

# Find where the preferred option was equal to the awarded preference
preferred_option_equals_awarded_preference = (
    df_split["preferred_option"] == df_split["awarded_preference"]
)

# Awarded entries filter
awarded_entries_filter = (
    awarded_preference_greater_than_zero & preferred_option_equals_awarded_preference
)

df_split_skipped = df_split[~awarded_entries_filter].copy()

# Print the number of awarded entries and the number of failed entries
print(
    f"Number of awarded entries: {len(df_split[awarded_entries_filter])}\nNumber of skipped entries: {len(df_split_skipped)}"
)

Number of awarded entries: 2401
Number of skipped entries: 75608


In [26]:
SPLIT_SKIPPED_FILE_PATH = os.path.join(
    os.getcwd(), f"{STRIPPED_CSV_FILE_NAME}_split_skipped.csv"
)

# Export the split skipped data to a csv file
df_split_skipped.to_csv(SPLIT_SKIPPED_FILE_PATH, index=False, date_format="%m-%d-%Y")