## Import Modules

In [None]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import csv 
import time
import urllib.request # The urllib.request module defines functions and classes which help in opening URLs (mostly HTTP)
import re
from urllib.request import urlopen
import numpy as np

## Part 1: Obtain a list of dataset pages and query the discovery metadata

We use PASDA's search feature to return a page (https://www.pasda.psu.edu/uci/SearchResults.aspx?Keyword=+) that lists all of the activate dataset landing pages with some metadata. Then, we use the Beautiful Soup module to query this page and harvest the following values:

- Title
- Date Issued
- Publisher
- Description
- Metadata file link
- Download link

### Obtain the file using ONE of the following options

In [89]:
# Option 1: Open "https://www.pasda.psu.edu/uci/SearchResults.aspx?Keyword=+" in a browser and download the page. 
# Save the file as "pasda-search.html" in the same directory as this notebook
# This is recommended if you are testing or running more than once
        
file_path = 'pasda-search.html'  # Modify this to the correct path to your downloaded HTML file
with open(file_path, 'r', encoding='utf-8') as file:
    html_content = file.read()
    
# Option 2: Just read the file directly online. This option is fine if the script is working.
    
# url = 'https://www.pasda.psu.edu/uci/SearchResults.aspx?Keyword=+'
# response = requests.get(url)
# html_content = response.content

In [90]:
soup = BeautifulSoup(html_content, 'html.parser')

# Assuming every dataset is contained in its own <tr> tag
datasets = soup.select('tr[align="left"]')

data = []

dataset_entries = soup.select('td > h3 > a[href^="DataSummary.aspx?dataset="]')

for entry in dataset_entries:
    publisher = entry.find_previous("td").text.strip()
    date = entry.find_previous("td").find_previous("td").text.strip()
    title = entry.text.strip()
    description = entry.find_next("span", id=lambda x: x and x.startswith('DataGrid1_Label3_')).text.strip()
#     metadataLink = entry.find_next_sibling("span").find('a', href=True, string='Metadata')['href']
    metadataFile = entry.parent.parent.find('a', href=True, string='Metadata')['href']
    metadataLink = "https://www.pasda.psu.edu/uci/" + metadataFile 
    try:
        download = entry.parent.parent.find('a', href=True, string='Download')['href']
    except:
        download = ''
        
    # obtain full landing page and create ID
    landing_page = "https://www.pasda.psu.edu/uci/" + entry['href']  # Landing page URL
    iden = 'pasda-' + landing_page.rsplit("=",1)[1]

    data.append([publisher, date, title, description, metadataFile, metadataLink, download, landing_page, iden])
    

# Convert to pandas dataframe
import pandas as pd
df = pd.DataFrame(data, columns=['Publisher', 'Date', 'Title', 'Description', 'Metadata File', 'HTML', 'Download', 'Information', 'ID'])
    

In [91]:
# optional: check to see that the metadata is parsing into the dataframe correctly
actionDate = time.strftime('%Y%m%d')
df.to_csv(f'output_{actionDate}.csv', index=False)

# Part 2: Query the supplemental metadata

Context: Most of the records have supplemental metadata in ISO 19139 or FGDC format. The link to this document is found in the 'HTML" column. Although these files are created as XMLs, the link is a rendered HTML.

There is additional information in these files that we want to scrape, including bounding boxes and geometry type.

We will start by downloading the metadata files - this will save time and reduce the load on PASDA's servers because this part of the recipe may need to be run multiple times after troubleshooting.

### Download the metadata files to a folder called "metadata_files"

In [81]:
import os

# Create a directory named 'metadata_files' to store the downloaded files
download_folder = 'metadata_files'
if not os.path.exists(download_folder):
    os.makedirs(download_folder)

In [83]:
import requests

def download_file(url, folder):
    """
    Download a file given its URL and store it in the specified folder.
    """
    # Get the filename from the URL
    filename = url.split("/")[-1]
    response = requests.get(url, stream=True)
    
    # Handle the response's content in chunks (useful for large files)
    with open(os.path.join(folder, filename), 'wb') as file:
        for chunk in response.iter_content(chunk_size=8192):
            file.write(chunk)

In [84]:
for url in df['HTML']:
    try:
        download_file(url, download_folder)
        print(f"Downloaded {url}")
    except Exception as e:
        print(f"Error downloading {url}. Reason: {e}")

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_AgSecurityAreas202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_LandConservancyEasements202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_MtJoyTwpPreservedFarms202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_MunicipalBoundary202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_Parcels202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_PreservedFarms202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AdamsCounty_Roads202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NWPAGlacialDeposits2018.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyCollege_GlacialDeposits1979.xml
Downloaded https://www.pasd

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batBigBrown.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batEasternPipistrelle.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batEasternRed.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batEasternSmallfootedMyotis.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batEveningBat.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batHoary.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batIndianaMyotis.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batLittleBrownMyotis.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batNorthernMyotis.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=batRafinesquBigeared.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=bat

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_Bridgepoints201306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_BuildingFootprints2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_Cemeteries201306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_CountyBoundary201107.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_ElectionPollingPlaces202006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_Highereducationsfacilities201306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_MuniBoundaries202006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCountyParcels202006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesterCounty_Railroads201306.xml
Download

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Amenities.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Art.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Parking_Entry_Exit.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Parking_Lots.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Rental_Car_Locations.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Rental_Car_Lots.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Roadway_Zones.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Runways.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyAirport_Terminal_Buildings.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Phil

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPlanning_Political_Wards.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPlanning_Schools.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPlanning_Zoning_BaseDistricts.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPlanning_Zoning_RCO.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=philadelphiaplanninganalysissections2005.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPolice_Boundaries_District.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPolice_Boundaries_Division.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPolice_Boundaries_PSA.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyPolice_CCD_BOUNDARY.xml
Downloaded https://www.pasda.psu.edu/uci/FullMet

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhiladelphiaTransparcels201201.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_DAMS.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_GSI_SMP_TYPES.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_INLETS.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_OUTFALLS.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_PWD_PARCELS2022.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhillyWater_RAINGAUGES.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhiladelphiaWatercoursesDesignatedProtection_Line2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PhiladelphiaWatercoursesDesignatedProtection_Poly2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadat

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=pamap_lidar_LAS.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAMAP_MuniBoundaries2007.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAMAP_Rail2007.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAMAP_Roads2007.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=pamaptileindexnorth2006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=pamaptileindexsouth2006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAMAP_Contours.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=cd108_polygon.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=countyseat.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drb_bnd_arc.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drb_bnd

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid_McKean.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid_Mercer.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid_Potter.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid_Tioga.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DFIRMDepthGrid_Venango.xml
Downloaded https://www.pasda.psu.edu/uci/Full

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_42_20220209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_Pennsylvania2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_Pennsylvania2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_Pennsylvania2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_42_20220209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_42_20220209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_42_20220209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_Pennsylvania2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NFHL_Pennsylvania.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=FEMA_NFHL_42_2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDispl

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Lycoming100yrall200811.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Lycoming500yr200811.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=LycomingFloodplain200811.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=LycomingFloodway200811.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=LycomingCountyPrivateRoads202001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=LycomingCountyRoadCenterlines202001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=LycomingCountySiteStructureAddressPoints202001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=MercerCounty_Centerlines202304.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=MercerCounty_TaxParcels202304.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisp

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Bedrock_Geology_of_Pennsylvania_Geologic_Units202305.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Coal_Fields_in_Pennsylvania_Anthracite202305.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Coal_Fields_in_Pennsylvania_High_Volatile_Bituminous202211.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Coal_Fields_in_Pennsylvania_Low_Volatile_Bituminous202304.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Coal_Fields_in_Pennsylvania_Medium_Volatile_Bituminous202305.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Coal_Fields_in_Pennsylvania_Semi_Anthracite202207.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DCNR_Earthquakes_in_Pennsylvania_1724_2003_202304.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?fil

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=eja_v.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=EnvironmentalJusticeAreas2010.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=EnvironmentalJusticeAreas2015.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ErosionandSedimentControlFacilities2023_07.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PADEP_HistoricOilGasWells.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=HistoricStreams2004.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ICEIS_SamplingStations2023_07.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=IndustrialMineralMiningOperations2023_07.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Industrial_Mineral_Surface_Mine_Permits202308.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadata

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAAirportLinearPart77_201302.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaCongressional2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaCounty2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaEngrDistrict2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PAInterstateHighwayExits2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaInterstateMileMarkers2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaLocalRoads2023_07.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaMunicipalities2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaSchoolDistricts2023_08.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PaSenatorial2023_08.xml
Download

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_CWDManagementUnits2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_ControlledGooseAreas2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_DuckZones2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_ElkManagementAreas2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_ElkManagementZones2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_EmergencyResponcePoints2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_BNDRegions2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_GameWardenDistircts2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_GamelandRoads2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGC_GamelandTrails2021.xm

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=wshed24k_SRBC.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=sub_west_SRBC.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_PLI_cntyageasements.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_PLI_Federal.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_PLI_NPF_Private.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AerialPhotoColor_cached.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=agriculturalsecurityareas.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PGCBearHarvestCounty_03_17.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AbandonedCropLandSince1969_ChesBay.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ChesBayBiofuelFeedstocks_HUC6.xml
Downloaded https://www.

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_Erie_Sidewalks2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_Campuses1985_1986.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkBuildings202106.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkBoundary202106.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkCrosswalks_ln202106.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkCrosswalks_poly202106.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkImprovementOtherThanBuildings202005.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityParkRoads_major202106.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PSU_OPP_UniversityPar

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=nlcd2001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=pamap2005.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=panlcd_2011.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=lc_and1_1992.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=lc_and1_2001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=lc_and1_2005.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=lc_and1_2011.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=habitat1992.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=habitat2001.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=habitat2005.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=habitat2011.xml
Downloaded https://www.pasda.psu.edu/uci/Ful

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_Soils2022.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_Compartment2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_EcologicalLandtype.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_ELT_Landform.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_LandTypeAssociation_ANF.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_MajorLocalTownPt2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_ManagementArea2021.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalForest_TravelRoute202105.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AlleghenyNationalF

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAIP_NY_CountyMosaics2009.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAIP_VA_CountyMosaics2009.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAIP_WV_CountyMosaics2009.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=nj02nass.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ny02nass.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USDA_usda_fs_d_NodeProbPruned.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USDA_usda_fs_d_nodeprobabilities.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_Soils2022.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=PA_Soils2022.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=pa_crops2006.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadat

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg24k_DC.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg24k_WV.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg250k_DE.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg250k_MD.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg250k_VA.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=drg250k_WV.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAOGP_BereaSandstoneAssessmentUnit.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAOGP_BereaSandstoneQuarterMileCells.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAOGP_CatskillSandstonesSiltstonesAssessmentUnit.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NAOGP_CatskillSandstonesSiltstonesAssessmentUnitQuarterMileCells.xml


Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NLCD_2017_Tree_Canopy_Cover_PA_v2021-4.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NLCD_2018_Tree_Canopy_Cover_PA_v2021-4.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NLCD_2019_Tree_Canopy_Cover_PA_v2021-4.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NLCD_2020_Tree_Canopy_Cover_PA_v2021-4.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NLCD_2021_Tree_Canopy_Cover_PA_v2021-4.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=impervious14_southeast3k_091406.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=canopy13_pa_northeast_3k_022007.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=canopy14_southeast_3k_022007.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=impervious13_pa_northeast_010407.xml
Down

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL1North_ClassifiedPointCloud2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL1North_Contour2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL1North_DEM2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL1North_Intensity2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL2North_Breakline2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL2South_Breakline2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL2North_ClassifiedPointCloud2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL2South_ClassifiedPointCloud2020.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Western_PA_QL2North_Contour2020.

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AllentownPA_Orthos2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AllentownPA_Orthos2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGS_LiDAR2017.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGS_LiDAR2017.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGS_LiDAR2017.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGS_LiDAR2017.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGSLiDAR_DauphinCoPA_2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGSLiDAR_DauphinCoPA_2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGSLiDAR_DauphinCoPA_2016.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=USGSLiDAR_DauphinCoPA_2016.xml
Downloaded https://www.pasda.psu.edu

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DRB_ProtectedLands202307.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DRB_ProtectedLands202307.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DRB_ProtectedLands202307.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=DRB_ProtectedLands202307.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=NHD_HR_Stream_Order_PA.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=AgriculturalSecurityAreas202011.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=ConservationEasements202307.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=FarmEasements202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Federal202306.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Local202306.xml
Downloaded https://ww

Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_PUB_FACIL_Schools202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_DIST_Senate202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=Pennsylvania_soil_metadata_201409.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_HYDRO_Streams202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_Transit_Routes202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_TRANSP_Transit_Stops202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_ENVIR_Unique_Features202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_DIST_Voting202209.xml
Downloaded https://www.pasda.psu.edu/uci/FullMetadataDisplay.aspx?file=YorkCounty_Zipcodes202209.xml
Downloaded https://www.pasda.psu

### Query the downloaded files

In [103]:
bounding_boxes = {}
spatial_reference_methods = {}
theme_keywords_list = []
place_keywords_list = []

metadata_folder = "metadata_files"

for metadata_file in df['Metadata File']:
    file_path = os.path.join(metadata_folder, metadata_file)
    
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            file_content = file.read()
            soup = BeautifulSoup(file_content, "html.parser")

    # for metadata_url in df['HTML']:
    #     response = requests.get(metadata_url)
    #     soup = BeautifulSoup(response.content, "html.parser")
            try: 
                try:
                    west = soup.find('i', string='West_Bounding_Coordinate:').next_sibling.strip()   
                except AttributeError:
#                     print(f"'West_Bounding_Coordinate:' not found in {metadata_file}")
                    west = '-80.52'

                try:
                    south = soup.find('i', string='South_Bounding_Coordinate:').next_sibling.strip()   
                except AttributeError:
#                     print(f"'South_Bounding_Coordinate:' not found in {metadata_file}")
                    south = '39.72'

                try:
                    east = soup.find('i', string='East_Bounding_Coordinate:').next_sibling.strip()   
                except AttributeError:
#                     print(f"'East_Bounding_Coordinate:' not found in {metadata_file}")
                    east = '-74.69'

                try:
                    north = soup.find('i', string='North_Bounding_Coordinate:').next_sibling.strip()   
                except AttributeError:
#                     print(f"'North_Bounding_Coordinate:' not found in {metadata_file}")
                    north = '42.51'

                bbox = west + ',' + south + ',' +east + ',' + north
        
#                 try:
#                     direct_spatial_ref = soup.find('i', string='Direct_Spatial_Reference_Method:').next_sibling.strip()
#                 except AttributeError:
#                     print(f"'Direct_Spatial_Reference_Method:' not found in {metadata_file}")
#                     direct_spatial_ref = 'unspecified'
            except:
                bbox = "-80.52,39.72,-74.69,42.51"

            
#             keyword_values = ""
    
#             try:
#             # Extract all instances of Theme_Keyword
#                 theme_keywords = soup.findAll('i', string='Theme_Keyword:')
        
#             # Get the values following each Theme_Keyword and concatenate them with "|"
#                 keyword_values = "|".join([kw.next_sibling.strip() for kw in theme_keywords])
#             except Exception as e:
#                 print(f"No theme keywords for {metadata_file}: {e}")
    
#                 theme_keywords_list.append(keyword_values if keyword_values else " ")


#             place_values = ""
    
#             try:
#             # Extract all instances of Place_Keyword
#                 place_keywords = soup.findAll('i', string='Plcae_Keyword:')
        
#             # Get the values following each Theme_Keyword and concatenate them with "|"
#                 place_values = "|".join([kw.next_sibling.strip() for kw in place_keywords])
#             except Exception as e:
#                 print(f"No place keywords for {metadata_file}: {e}")
    
#                 place_keywords_list.append(place_values if place_values else " ")
                
                
                

            bounding_boxes[metadata_file] = bbox
            spatial_reference_methods[metadata_file] = direct_spatial_ref

    except:
        pass

df['Bounding Box'] = df['Metadata File'].map(bounding_boxes)
# df['Resource Type'] = direct_spatial_ref + ' data'
# df['Resource Class'] = np.where(df['Resource Type'] == 'Raster data', 'Imagery', 'Datasets')
# df['Keyword'] = theme_keywords_list
# df['Spatial Coverage'] = place_keywords_list


actionDate = time.strftime('%Y%m%d')
df.to_csv(f'output_{actionDate}.csv', index=False)
print('#### Job done ####')

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_AgSecurityAreas202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_LandConservancyEasements202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_MtJoyTwpPreservedFarms202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_MunicipalBoundary202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_Parcels202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_PreservedFarms202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AdamsCounty_Roads202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NWPAGlacialDeposits2018.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Alleg

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=batRafinesquBigeared.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=batSeminole.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=batSilverhaired.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=batSoutheasternMyotis.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=batTownsendBigeared.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=BedfordCountyParcels202307.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=BlairCounty_AddressPoints202210.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=BlairCounty_StreetCenterlines202210.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=BlairCounty_Parcels202210.xml
'Direct_Spatial_Reference_Method:' not found in Fu

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChesterCounty_Railroads201306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChesterCounty_RoadCenterlines202006.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChesterCounty_Schooldistrictboundaries201306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChesterCounty_Schools201306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChesterCounty_ZipCodes2017.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=LivableLandscapesBasemapFeatures201306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=LivableLandscapesNaturalResourcesOverlay201306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ChoptankRiverHeritage_Access201101.xml
'Direct_Spatial_Reference_Method:' not found in FullMetad

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Philadelphia_Building_3DModels.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Philadelphia_Building_3DModels.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Philadelphia_Building_3DModels.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhiladelphiaBuildings2017.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhiladelphiaCensusBlockGroups201201.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=philadelphiacensustracts2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Philadelphia_Census_Tracts_2010_201302.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyCommerce_Business_Services_Market_Areas.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?fi

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2006.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2007.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2008.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2009.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2010.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2011.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2012.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2013.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyPolice_INCIDENTS_2014.xml
'Direct_Sp

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_Combined_Sewer_Service_Area.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_Green_StWater_Infrastructure.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_GSI_Planning_Districts.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_GSI_Private_Projects_Regs.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_HistoricStreams_Arc.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_HistoricStreams_Poly.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_Hydrographic_Features_Arc.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PhillyWatersheds_Hydrographic_Features_Poly.xml
'Direct_

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_huc8_polygon.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_co_clip.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_co_polygon.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_riv_arc.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_citytown_point.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_res_polygon.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_rds_arc.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_sb_polygon.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=drb_huc11_polygon.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=delawareriver.xml
'Direct_Spatial_Reference_Meth

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_NFHL_36_NY_20170614.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_NFHL_39_OH_20170802.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_NFHL_42_2020.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_dfirm_AllPA.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_NFHL_51_VA_20170805.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_NFHL_54_WV_20161030.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=FEMA_PA_Historic_Flood_Maps.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=HAZUS_Hpr_Baltimore_City_MD.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=HAZUS_Hpr_Baltimore_Co_MD.xml
'Direct_Spatial_Reference_Method:' not found 

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IndianaCountyMunicipalBoundaries201912.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IndianaCountyParcels201912.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IndianaCountyRailroads201912.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=juniatamunicipal200801.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=JuniataCountyParcels200801.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=KeepPennsylvaniaBeautifulIllegalDumpSurveys.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=KeepPennsylvaniaBeautifulIllegalDumpSurveys201008.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=KeepPennsylvaniaBeautifulIllegalDumpSurveys201107.xml
'Direct_Spatial_Reference_Method:' not found in FullMet

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MontgomeryCounty_StreetCenterline202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MontgomeryCounty_Trails202207.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MontgomeryCounty_WaterServiceAreas202207.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=mrrysvll-parcels.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=mrrysvll-zoning.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=mrrysvll_roads.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=pa_srtm_2010.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Monumentation.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=AT_Centerline.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadat

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_TenMW_Access_and_Need.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_TenMW_County_Stats.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_TenMW_Municipal_Stats.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_ROS_PA.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_LocalPark202306.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_LocalParkAccess201511.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DCNR_BOF_WildandNaturalAreas201703.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=dcnr_pawildsrec.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=pags_physprov1k.xml
'Direct_Spatial_Reference_Method:' not found in FullMetad

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WPAMineMaps.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WPAMineMaps_TileIndex.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_Ambulatory_Surgery_Centers202302.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_BirthCenters202308.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_CommunityMentalHealthCenters202208.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_DrugAlcoholTreatmentFacilities202212.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_EMSRegionalCouncils202205.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_HomeHealthAgencies202208.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=DOH_Hospices202308.xml
'Direct_Sp

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Boating_SpecReg_Lakes202108.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Boating_SpecReg_Streams201601.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Boating_SpecReg_Pt202108.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ClassA_Streams202305.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ClassA_Streams202305_pt.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=CooperativeNurseries202002.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Erie_Tributaries202108.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ErieTribs_OpenFishing202303_pt.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ErieTribs_OpenFishing_Poly202303.xml
'Direct_Spatial_Refe

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Past_Landslides.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Sidewalks202002.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=SPC_Landcover.xml
'Direct_Spatial_Reference_Me

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_Imagery2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_Imagery2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_Imagery2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_Imagery2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_Imagery2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Lake_Erie_Watershed_2012_TileIndex.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_COUNTY_lc_change.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_MajorSheds_LC_Change.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_PhysioPro

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=gapherps30.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=gapherps90.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=gapmammals30.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=gapmammals90.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=gapstewardship1999.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_GeomorphonLandformMaps2021.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=painactiverailroads.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_HUC10_2011.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_HUC12_2011.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Riparia_HUC8_LC_Change.xml
'D

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaStateEquivalentStatebased2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaStateLegislativeDistrictLowerChamber2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaStateLegislativeDistrictUpperChamber2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaSuperPublicUseMicrodataArea2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaUnifiedSchoolDistricts2000.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaEconomicCensusCounty2009.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_PennsylvaniaEconomicCensusPlace2009.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=tl_Pennsylvania3Digit20

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in Fu

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_Soils2022.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1979_1981.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1982.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1988.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1989_1990.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1991_1992.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=IncomeEmployment1993_2001.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Crime_PA2002.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NAOGP_GreenbrierLimestoneAssessmentUnitQuarterMileCells.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=maptechhistoric2005.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=maptech_v2.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NAOGP_MarcellusShaleAssessmentUnit.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NAOGP_MarcellusShaleAssessmentUnitQuarterMileCells.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MD_PA_SandyLiDAR2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MD_PA_SandyLiDAR2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=MD_PA_SandyLiDAR2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NAOGP_MississippianSandstonesA

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=PA_SandyLiDAR2015.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=nlcd_2011_landcover_2011_edition_2014_10_10.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=nlcd_2011_impervious_2011_edition_2014_10_10.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NLCD_2016_Land_Cover_L48.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NLCD_Land_Cover_Change_Index_L48.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=nlcd_2011_USFS_tree_canopy_2011_edition_2014_03_31.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NALCMS_2020_PA_Land_Cover_30m.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=NAOGP_NorthwesternOhioShaleAssessmentUnit.xml
'Direct_Spatial_Reference_Method:' not found in FullMeta

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf_Historical.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf2014.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf2020.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf_Historical.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf30_60min.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf_Historical.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=USGS_GEOpdf_Historical.xml


'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=ForestType_2011_DRBC.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=Baywide_LandCover.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=landcover_2013_delawareriverbasin.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=LandCover_MappingArea_2010_DRBC.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay

'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WPC_huc12calclu.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WPC_nonprotectedsites.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WPC_rf3shedlu.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WyomingCounty_Addresses202109.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WyomingCounty_Parcels202109.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=WyomingCounty_Streets202109.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=YorkCounty_Address_Pts202209.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=YorkCounty_Ag_Security202209.xml
'Direct_Spatial_Reference_Method:' not found in FullMetadataDisplay.aspx?file=YorkCounty_TRANSP_Airports202209.xml
'Direct_Spatial_Reference_Method:

## Part 3: add default and calculated values

In [None]:
# Create Date Range field
df['Date Range'] = df.apply(lambda row: f"{row['Date Issued']}-{row['Date Issued']}" if pd.notna(row['Date Issued']) else '', axis=1)

In [None]:
# Create an identifier
iden = 'pasda-' + landingPage.rsplit("=",1)[1]

In [None]:
# Append default values

df['Code'] = '08a-01'
df['Access Rights'] = 'Public'
df['Accrual Method'] = 'HTML'
df['Date Accessioned'] = time.strftime('%Y-%m-%d')
df['Language'] = 'eng'
df['Is Part Of'] = '08a-01'
df['Member Of'] = 'ba5cc745-21c5-4ae9-954b-72dd8db6815a'
df['Provider'] = 'Pennsylvania Spatial Data Access (PASDA)'

In [None]:
df

In [None]:
# Define the desired order of columns
desired_order = [
'Title',
'Alternative Title',
'Description',
'Language',
'Creator',
'Publisher',
'Provider',
'Resource Class',
'Resource Type',
'Theme',
'Keyword',
'Date Issued',
'Date Range',
'Spatial Coverage',
'Bounding Box',
'Member Of',
'Download',
'HTML',
'Information',
'ID',
'Identifier',
'Access Rights',

# Add more columns as needed in the desired order
]

# Reindex the DataFrame based on the desired order of columns
df = df.reindex(columns=desired_order)