# Purpose
* Create the base tracking visits to National Parks
* Pulled on February 3, 2025.

# Setup

## Packages

In [1]:
###############################################################################
# GENERAL #
import pathlib as pl
import zipfile


###############################################################################
# ANALYSIS #
import geopandas as gpd
import numpy as np
import pandas as pd

###############################################################################
# LOCAL #

ROOT_DIR = "/Users/evancanfield/Documents/Projects/national_park_passport_stamps/"


## Data

### Google Map Export

#### Unzip KMZ File

In [2]:
def kmz_to_kml(kmz_file, output_dir):
    kmz_file_name = f'{kmz_file.stem}.kml'
    with zipfile.ZipFile(kmz_file, 'r') as kmz:
        for file in kmz.namelist():
            if file.endswith('.kml'):
                kml_content = kmz.read(file)
                kml_path = pl.Path(output_dir, kmz_file_name)
                with open(kml_path, 'wb') as kml_file:
                    kml_file.write(kml_content)
                print(f"KML file extracted to: {kml_path}")

# Define path to kml file
date_of_conversion = "2025-02-03"
map_stem = f"national_park_visited_records_{date_of_conversion}"
map_dir = pl.Path(ROOT_DIR, "data", "raw")
map_kml_file = pl.Path(map_dir, f'{map_stem}.kml')
map_kmz_file = pl.Path(map_dir, f'{map_stem}.kmz')

if not map_kml_file.exists():

    kmz_to_kml(map_kmz_file, map_kmz_file.parent)

else:
    print('KML File Aready Exists.')

KML File Aready Exists.


#### Read KML File

In [3]:
# Identify all kml layers
layers = gpd.list_layers(map_kml_file)

# Init storage
dct_map = {}


for layer in layers['name'].unique():
    dct_map[layer] = gpd.read_file(
        map_kml_file,
        layer = layer
    )

# Processing

## Clean Up Google Map Data

In [4]:
# Initialize varaiables
frames = []
drop_cols = ['description', 'geometry']


for split, gdf in dct_map.items():
    print(80 * '-')
    print(split)
    print()

    # Set columns to lower case
    cols = gdf.columns
    gdf.columns = [col.lower() for col in cols]

    # Latitude
    gdf['latitude'] = gdf['geometry'].apply(
        lambda x: x.y
    )

    # Longitude
    gdf['longitude'] = gdf['geometry'].apply(
        lambda x: x.x
    )

    # Convert to dataframe and drop select columns
    df = pd.DataFrame(gdf).drop(columns = drop_cols)

    # Assin region to a column
    df['split'] = split

    frames.append(df)

df_visit = pd.concat(frames)

# Sort by Year and Name
df_visit = df_visit.sort_values(
    ['split',  'name'], 
    ascending=[True, True]
)

--------------------------------------------------------------------------------
Not Visited

--------------------------------------------------------------------------------
Visited - Evan

--------------------------------------------------------------------------------
Visited - Kelsey

--------------------------------------------------------------------------------
Visited - Evan and Kelsey



## Generate Map Label

In [5]:
# Apply function to create a new column
df_visit['Kelsey'] = df_visit['split'].apply(
    lambda x: 'Kelsey' in x
)

df_visit['Evan'] = df_visit['split'].apply(
    lambda x: 'Evan' in x
)

# Export

In [6]:
processed_data_dir = pl.Path(ROOT_DIR, "data", "processed")
visited_table_name = f"national_park_visited_records_{date_of_conversion}.csv"
visited_table_name = pl.Path(processed_data_dir, visited_table_name)

df_visit.to_csv(visited_table_name, index = False)