# Download and prepare weather data

## 1. Match weather station & target airports, downdoad data

In [1]:
import os
import requests
base_url_template = "https://www.ncei.noaa.gov/oa/local-climatological-data/v2/access/{year}/"
file_prefix = "LCD_"
output_dir = "downloaded_csv_files"
os.makedirs(output_dir, exist_ok=True)

# target years
years = range(2018, 2025)

# target airports
target_coords = [
    ("ATL", 33.640411, -84.419853),
    ("DFW", 32.89748, -97.040443),
    ("DEN", 39.849312, -104.673828),
    ("LAX", 33.942791, -118.410042),
    ("ORD", 41.978611, -87.904724),
    ("JFK", 40.641766, -73.780968),
    ("MCO", 28.424618, -81.310753),
    ("LAS", 36.083134, -115.148315),
    ("CLT", 35.21389, -80.943054),
    ("MIA", 25.79516, -80.279594),
    ("SEA", 47.443546, -122.301659),
    ("EWR", 40.689491, -74.174538),
    ("SFO", 37.615223, -122.389977),
    ("PHX", 33.435249, -112.010216),
    ("IAH", 29.993067, -95.341812),
    ("BOS", 42.365589, -71.010025),
    ("FLL", 26.074215, -80.150726),
    ("MSP", 44.88197, -93.22178),
    ("LGA", 40.776863, -73.874069),
    ("DTW", 42.213249, -83.352859),
    ("PHL", 39.87294, -75.243988),
    ("SLC", 40.78633019, -111.9733294),
    ("BWI", 39.17754, -76.668526),
    ("DCA", 38.8514403, -77.0377214),
    ("SAN", 32.73177, -117.197624),
    ("IAD", 38.9474564, -77.4599286),
    ("TPA", 27.979168, -82.539337),
    ("BNA", 36.131687, -86.668823),
    ("AUS", 30.1945272, -97.6698761),
    ("MDW", 41.7859722, -87.7524167),
    ("HNL", 21.3178247, -157.9202503),
    ("DAL", 32.848152, -96.851349),
    ("PDX", 45.5887089, -122.5968694),
    ("STL", 38.7486982, -90.3700257),
    ("RDU", 35.8776389, -78.7874722),
    ("HOU", 29.6457998, -95.2772316),
    ("SMF", 38.6954444, -121.5907778),
    ("MSY", 29.9932722, -90.2590275),
    ("SJC", 37.363949, -121.92894),
    ("SJU", 18.4393992, -66.0021333),
    ("SNA", 33.678925, -117.862869),
    ("MCI", 39.2976111, -94.7138889),
    ("OAK", 37.7212614, -122.2211506),
    ("SAT", 29.5339583, -98.4690569),
    ("RSW", 26.5361639, -81.755155),
    ("CLE", 41.4094069, -81.8546911),
    ("IND", 39.7173056, -86.2946389),
    ("PIT", 40.4914167, -80.2326944),
    ("CVG", 39.053276, -84.663017),
    ("CMH", 39.9969467, -82.8921592),
    ("PBI", 26.6831617, -80.0955919),
    ("OGG", 20.8986486, -156.4304586),
    ("JAX", 30.4940456, -81.6878467),
    ("ONT", 34.0560142, -117.6011875),
    ("BUR", 34.2006944, -118.3586667),
    ("BDL", 41.9390322, -72.6843158),
    ("CHS", 32.8986389, -80.0405278),
    ("MKE", 42.94989, -87.900414),
    ("ANC", 61.1740847, -149.9981375),
    ("ABQ", 35.0389316, -106.6082622),
    ("OMA", 41.3031667, -95.8940556),
    ("MEM", 35.040031, -89.981873),
    ("RIC", 37.5051811, -77.3197386),
    ("BOI", 43.5643611, -116.2228611),
    ("ORF", 36.8946042, -76.2012292),
    ("BUF", 42.9404272, -78.7305697),
    ("SDF", 38.174085, -85.7364936),
    ("RNO", 39.4991111, -119.7681111),
    ("SRQ", 27.3954444, -82.5543889),
    ("OKC", 35.393074, -97.6007617),
    ("KOA", 19.738765, -156.0456311),
    ("ELP", 31.8073333, -106.3763611),
    ("GEG", 47.6190278, -117.5352222),
    ("TUS", 32.116112, -110.941109),
    ("SAV", 32.1275833, -81.2021389),
    ("GRR", 42.8808333, -85.5228056),
    ("LGB", 33.8179297, -118.1518906),
    ("LIH", 21.9759833, -159.3389578),
    ("PVD", 41.7223333, -71.4277222),
    ("MYR", 33.6797411, -78.9283214),
    ("PSP", 33.8296697, -116.5066942),
    ("TUL", 36.1983933, -95.8881053),
    ("DSM", 41.5339728, -93.6630722),
    ("BHM", 33.5638889, -86.7523056),
    ("SFB", 28.7771781, -81.2349136),
    ("SYR", 43.1111811, -76.1063203),
    ("TYS", 35.805813, -83.989815),
    ("ALB", 42.7491161, -73.80198),
    ("PNS", 30.4734167, -87.1866111),
    ("ROC", 43.128002, -77.665474),
    ("GSP", 34.8956711, -82.2188594),
    ("PIE", 27.9086336, -82.68651),
    ("BZN", 45.7772358, -111.1502603),
    ("FAT", 36.7765556, -119.7188333),
    ("COS", 38.8058167, -104.7007764),
    ("HPN", 41.0669531, -73.7075661),
    ("AVL", 35.436077, -82.541298),
    ("VPS", 30.4832194, -86.5260443),
    ("PWM", 43.6456435, -70.3086164),
    ("LIT", 34.7294414, -92.2247772),
    ("MSN", 43.1398791, -89.3375045)
]

# station list URL
station_list_url = "https://www.ncei.noaa.gov/oa/local-climatological-data/v2/doc/lcdv2-station-list.txt"
response = requests.get(station_list_url)

if response.status_code == 200:
    selected_file_codes = []
    found_folder_names = set()  # Track folder names already added

    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station

                # First pass: narrow filter with a threshold of 0.01
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <= 0.01 and abs(lon - target_lon) <= 0.01:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.01 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station
                # Second pass: wider filter with a threshold of 0.02 if not added yet
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <= 0.02 and abs(lon - target_lon) <= 0.02:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.02 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <=0.04 and abs(lon - target_lon) <= 0.04:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.04 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            added = False  # Track if a folder was added for this station
            for folder_name, target_lat, target_lon in target_coords:
                if folder_name not in found_folder_names and abs(lat - target_lat) <=0.1 and abs(lon - target_lon) <= 0.1:
                    selected_file_codes.append((station_code, folder_name))
                    found_folder_names.add(folder_name)
                    added = True
                    print(f"Added {station_code} with {folder_name} using 0.1 threshold")
                    break  # Exit loop once a match is found with 0.01 threshold

                    
#download the files
    for year in years:
        base_url = base_url_template.format(year=year)
        file_suffix = f"_{year}.csv"
        
        for station_code, folder_name in selected_file_codes:
            sub_dir = os.path.join(output_dir, folder_name)
            os.makedirs(sub_dir, exist_ok=True)
            file_name = f"{file_prefix}{station_code}{file_suffix}"
            file_url = f"{base_url}{file_name}"
            
            file_response = requests.get(file_url)
            if file_response.status_code == 200:
                file_path = os.path.join(sub_dir, file_name)
                with open(file_path, 'wb') as f:
                    f.write(file_response.content)
                print(f"{file_name} saved to {folder_name} for year {year}")
            else:
                print(f"{file_name} error: (：{file_response.status_code})")
else:
    print(f"error: {response.status_code}")


Added USA00722083 with CHS using 0.01 threshold
Added USI0000PHIK with HNL using 0.01 threshold
Added USW00003812 with AVL using 0.01 threshold
Added USW00003822 with SAV using 0.01 threshold
Added USW00012844 with PBI using 0.01 threshold
Added USW00012854 with SFB using 0.01 threshold
Added USW00012871 with SRQ using 0.01 threshold
Added USW00012873 with PIE using 0.01 threshold
Added USW00012894 with RSW using 0.01 threshold
Added USW00012918 with HOU using 0.01 threshold
Added USW00013717 with MYR using 0.01 threshold
Added USW00013737 with ORF using 0.01 threshold
Added USW00013740 with RIC using 0.01 threshold
Added USW00013743 with DCA using 0.01 threshold
Added USW00013858 with VPS using 0.01 threshold
Added USW00013876 with BHM using 0.01 threshold
Added USW00013899 with PNS using 0.01 threshold
Added USW00013967 with OKC using 0.01 threshold
Added USW00013968 with TUL using 0.01 threshold
Added USW00013994 with STL using 0.01 threshold
Added USW00014721 with BDL using 0.01 th

LCD_USW00013722_2018.csv saved to RDU for year 2018
LCD_USW00013739_2018.csv saved to PHL for year 2018
LCD_USW00013881_2018.csv saved to CLT for year 2018
LCD_USW00013889_2018.csv saved to JAX for year 2018
LCD_USW00013891_2018.csv saved to TYS for year 2018
LCD_USW00013893_2018.csv saved to MEM for year 2018
LCD_USW00013904_2018.csv saved to AUS for year 2018
LCD_USW00013960_2018.csv saved to DAL for year 2018
LCD_USW00013963_2018.csv saved to LIT for year 2018
LCD_USW00014768_2018.csv saved to ROC for year 2018
LCD_USW00014821_2018.csv saved to CMH for year 2018
LCD_USW00023160_2018.csv saved to TUS for year 2018
LCD_USW00023169_2018.csv saved to LAS for year 2018
LCD_USW00023188_2018.csv saved to SAN for year 2018
LCD_USW00023230_2018.csv saved to OAK for year 2018
LCD_USW00024127_2018.csv saved to SLC for year 2018
LCD_USW00024131_2018.csv saved to BOI for year 2018
LCD_USW00024132_2018.csv saved to BZN for year 2018
LCD_USW00024233_2018.csv saved to SEA for year 2018
LCD_USW00093

LCD_USW00014735_2020.csv saved to ALB for year 2020
LCD_USW00014739_2020.csv saved to BOS for year 2020
LCD_USW00014764_2020.csv saved to PWM for year 2020
LCD_USW00014765_2020.csv saved to PVD for year 2020
LCD_USW00014771_2020.csv saved to SYR for year 2020
LCD_USW00014810_2020.csv error: (：404)
LCD_USW00014819_2020.csv saved to MDW for year 2020
LCD_USW00014820_2020.csv saved to CLE for year 2020
LCD_USW00014837_2020.csv saved to MSN for year 2020
LCD_USW00014839_2020.csv saved to MKE for year 2020
LCD_USW00014922_2020.csv saved to MSP for year 2020
LCD_USW00014933_2020.csv saved to DSM for year 2020
LCD_USW00014942_2020.csv saved to OMA for year 2020
LCD_USW00021510_2020.csv saved to KOA for year 2020
LCD_USW00022536_2020.csv saved to LIH for year 2020
LCD_USW00023044_2020.csv saved to ELP for year 2020
LCD_USW00023050_2020.csv saved to ABQ for year 2020
LCD_USW00023129_2020.csv saved to LGB for year 2020
LCD_USW00023152_2020.csv saved to BUR for year 2020
LCD_USW00023183_2020.csv 

LCD_USW00024132_2021.csv saved to BZN for year 2021
LCD_USW00024233_2021.csv saved to SEA for year 2021
LCD_USW00093037_2021.csv saved to COS for year 2021
LCD_USW00093721_2021.csv saved to BWI for year 2021
LCD_USW00093738_2021.csv saved to IAD for year 2021
LCD_USW00093819_2021.csv saved to IND for year 2021
LCD_USW00094789_2021.csv saved to JFK for year 2021
LCD_USW00094823_2021.csv saved to PIT for year 2021
LCD_USW00003102_2021.csv saved to ONT for year 2021
LCD_USW00012839_2021.csv saved to MIA for year 2021
LCD_USW00013874_2021.csv saved to ATL for year 2021
LCD_USW00013897_2021.csv saved to BNA for year 2021
LCD_USW00023174_2021.csv saved to LAX for year 2021
LCD_USW00023234_2021.csv saved to SFO for year 2021
LCD_USW00026451_2021.csv saved to ANC for year 2021
LCD_USW00094847_2021.csv saved to DTW for year 2021
LCD_USW00094860_2021.csv saved to GRR for year 2021
LCD_RQC00668814_2021.csv saved to SJU for year 2021
LCD_USA00722083_2022.csv error: (：404)
LCD_USI0000PHIK_2022.csv 

LCD_USW00023152_2023.csv saved to BUR for year 2023
LCD_USW00023183_2023.csv saved to PHX for year 2023
LCD_USW00023185_2023.csv saved to RNO for year 2023
LCD_USW00023293_2023.csv saved to SJC for year 2023
LCD_USW00024157_2023.csv saved to GEG for year 2023
LCD_USW00093138_2023.csv saved to PSP for year 2023
LCD_USW00093184_2023.csv saved to SNA for year 2023
LCD_USW00093193_2023.csv saved to FAT for year 2023
LCD_USW00093225_2023.csv saved to SMF for year 2023
LCD_USW00093814_2023.csv saved to CVG for year 2023
LCD_USW00093821_2023.csv saved to SDF for year 2023
LCD_USW00094745_2023.csv saved to HPN for year 2023
LCD_USA00911903_2023.csv error: (：404)
LCD_USW00003017_2023.csv saved to DEN for year 2023
LCD_USW00003870_2023.csv saved to GSP for year 2023
LCD_USW00003927_2023.csv saved to DFW for year 2023
LCD_USW00003947_2023.csv saved to MCI for year 2023
LCD_USW00004202_2023.csv error: (：404)
LCD_USW00012815_2023.csv saved to MCO for year 2023
LCD_USW00012842_2023.csv saved to TPA 

### 2.  Data cleaning and processing for each station

In [2]:
# Data cleaning and processing for each station
import pandas as pd
import os
import requests
output_dir = "downloaded_csv_files"
cleaned_output_dir = os.path.join(output_dir, 'cleaned_output')
os.makedirs(cleaned_output_dir, exist_ok=True)
# Selected columns
columns_to_keep = [
    'STATION', 'DATE', 'LATITUDE', 'LONGITUDE', 'NAME',
    'HourlyDewPointTemperature', 'HourlyDryBulbTemperature', 
    'HourlyPrecipitation', 'HourlySeaLevelPressure', 
    'HourlyStationPressure', 'HourlyVisibility', 'HourlySkyConditions',
    'HourlyWindDirection', 'HourlyWindSpeed','DailySnowDepth','DailySnowfall','REPORT_TYPE'
]

start_date = '2018-11-01'
end_date = '2024-01-31'
exclude_start = '2020-11-01'
exclude_end = '2021-01-31'

# target years
years = range(2018, 2025)
##put these codes here to load the raw csvs downloaded before, 
# station list URL
station_list_url = "https://www.ncei.noaa.gov/oa/local-climatological-data/v2/doc/lcdv2-station-list.txt"
response = requests.get(station_list_url)
file_prefix = "LCD_"
if response.status_code == 200:
    selected_file_codes = []
    found_folder_names = set()  # Track folder names already added

    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station

                # First pass: narrow filter with a threshold of 0.01
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <= 0.01 and abs(lon - target_lon) <= 0.01:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.01 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station
                # Second pass: wider filter with a threshold of 0.02 if not added yet
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <= 0.02 and abs(lon - target_lon) <= 0.02:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.02 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            if station_code.startswith("US"):
                added = False  # Track if a folder was added for this station
                for folder_name, target_lat, target_lon in target_coords:
                    if folder_name not in found_folder_names and abs(lat - target_lat) <=0.04 and abs(lon - target_lon) <= 0.04:
                        selected_file_codes.append((station_code, folder_name))
                        found_folder_names.add(folder_name)
                        added = True
                        print(f"Added {station_code} with {folder_name} using 0.04 threshold")
                        break  # Exit loop once a match is found with 0.01 threshold
    for line in response.text.splitlines():
        parts = line.split()
        if len(parts) >= 3:
            station_code, lat, lon = parts[0], float(parts[1]), float(parts[2])
            added = False  # Track if a folder was added for this station
            for folder_name, target_lat, target_lon in target_coords:
                if folder_name not in found_folder_names and abs(lat - target_lat) <=0.1 and abs(lon - target_lon) <= 0.1:
                    selected_file_codes.append((station_code, folder_name))
                    found_folder_names.add(folder_name)
                    added = True
                    print(f"Added {station_code} with {folder_name} using 0.1 threshold")
                    break  # Exit loop once a match is found with 0.01 threshold              

# Clean and process data for each station
encodings_to_try = ['utf-8', 'ISO-8859-1', 'latin1']

for station_code, folder_name in selected_file_codes:
    # Combine all years' data for each station
    dfs = []
    num = 0
    for year in years:
        file_path = os.path.join(output_dir, folder_name, f"{file_prefix}{station_code}_{year}.csv")
        print(f"Attempting to read {file_path}")
        
        if os.path.exists(file_path):
            # Try multiple encodings
            for encoding in encodings_to_try:
                try:
                    # Attempt to read with the current encoding
                    df = pd.read_csv(file_path, engine='python', on_bad_lines='skip', encoding=encoding)
                    
                    # Check if required columns are present
                    if set(columns_to_keep).issubset(df.columns):
                        df = df[columns_to_keep]  # Keep only required columns
                        dfs.append(df)
                        print(f"Successfully read {file_path} with encoding {encoding}")
                        break  # Exit the encoding loop once successfully read
                    else:
                        print(f"Skipping {file_path} due to missing columns with encoding {encoding}")
                except Exception as e:
                    print(f"Failed to read {file_path} with encoding {encoding}: {e}")
            else:
                # If no encoding worked, report a read failure
                print(f"Failed to read {file_path} with any encoding")
        else:
            print(f"File not found: {file_path}")
    
    if dfs:
        combined_df = pd.concat(dfs, axis=0)
        combined_df['DATE'] = pd.to_datetime(combined_df['DATE'])
        filtered_df = combined_df[(combined_df['DATE'] >= start_date) & (combined_df['DATE'] <= end_date)]
        filtered_df = filtered_df[(filtered_df['DATE'].dt.month >= 11) | (filtered_df['DATE'].dt.month == 1)]
        filtered_df = filtered_df[~((filtered_df['DATE'] >= exclude_start) & (filtered_df['DATE'] <= exclude_end))]

        # Remove duplicate dates and fill these columns with 0
        filtered_df = filtered_df[(filtered_df['REPORT_TYPE'] == 'FM-15') & (filtered_df['REPORT_TYPE'] != 'SOM')]
        filtered_df['DailySnowfall'] = filtered_df.groupby(filtered_df['DATE'].dt.date)['DailySnowfall'].transform(lambda x: x.ffill().bfill())
        filtered_df['DailySnowDepth'] = filtered_df.groupby(filtered_df['DATE'].dt.date)['DailySnowDepth'].transform(lambda x: x.ffill().bfill())
        filtered_df[['HourlyPrecipitation', 'DailySnowfall', 'DailySnowDepth','HourlyWindSpeed']] = filtered_df[['HourlyPrecipitation', 'DailySnowfall', 'DailySnowDepth','HourlyWindSpeed']].fillna(0)
        #imputation
        filtered_df.set_index('DATE', inplace=True)
        columns_to_interpolate = ['HourlyDewPointTemperature', 'HourlyDryBulbTemperature', 
                          'HourlySeaLevelPressure', 'HourlyStationPressure', 'HourlyVisibility']
        filtered_df[columns_to_interpolate] = filtered_df[columns_to_interpolate].interpolate(method='time', limit_direction='both')
        filtered_df.reset_index(inplace=True)

        # Save cleaned data to a single CSV file per station in the cleaned_output directory
        station_cleaned_dir = os.path.join(cleaned_output_dir)
        os.makedirs(station_cleaned_dir, exist_ok=True)
        output_file = os.path.join(station_cleaned_dir, f"{folder_name}.csv")
        filtered_df.to_csv(output_file, index=False)
        print(f"Processed and saved cleaned data for {station_code} to {output_file}")
        num+=1

Added USA00722083 with CHS using 0.01 threshold
Added USI0000PHIK with HNL using 0.01 threshold
Added USW00003812 with AVL using 0.01 threshold
Added USW00003822 with SAV using 0.01 threshold
Added USW00012844 with PBI using 0.01 threshold
Added USW00012854 with SFB using 0.01 threshold
Added USW00012871 with SRQ using 0.01 threshold
Added USW00012873 with PIE using 0.01 threshold
Added USW00012894 with RSW using 0.01 threshold
Added USW00012918 with HOU using 0.01 threshold
Added USW00013717 with MYR using 0.01 threshold
Added USW00013737 with ORF using 0.01 threshold
Added USW00013740 with RIC using 0.01 threshold
Added USW00013743 with DCA using 0.01 threshold
Added USW00013858 with VPS using 0.01 threshold
Added USW00013876 with BHM using 0.01 threshold
Added USW00013899 with PNS using 0.01 threshold
Added USW00013967 with OKC using 0.01 threshold
Added USW00013968 with TUL using 0.01 threshold
Added USW00013994 with STL using 0.01 threshold
Added USW00014721 with BDL using 0.01 th

Successfully read downloaded_csv_files\SAV\LCD_USW00003822_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\SAV\LCD_USW00003822_2021.csv
Successfully read downloaded_csv_files\SAV\LCD_USW00003822_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\SAV\LCD_USW00003822_2022.csv
Successfully read downloaded_csv_files\SAV\LCD_USW00003822_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\SAV\LCD_USW00003822_2023.csv
Successfully read downloaded_csv_files\SAV\LCD_USW00003822_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\SAV\LCD_USW00003822_2024.csv
Successfully read downloaded_csv_files\SAV\LCD_USW00003822_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00003822 to downloaded_csv_files\cleaned_output\SAV.csv
Attempting to read downloaded_csv_files\PBI\LCD_USW00012844_2018.csv
Successfully read downloaded_csv_files\PBI\LCD_USW00012844_2018.csv with encoding utf-8
Attempting to read downloaded_c

Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\MYR\LCD_USW00013717_2020.csv
Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\MYR\LCD_USW00013717_2021.csv
Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\MYR\LCD_USW00013717_2022.csv
Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\MYR\LCD_USW00013717_2023.csv
Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\MYR\LCD_USW00013717_2024.csv
Successfully read downloaded_csv_files\MYR\LCD_USW00013717_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00013717 to downloaded_csv_files\cleaned_output\MYR.csv
Attempting to read downloaded_c

Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2019.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2020.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2021.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2022.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2023.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\OKC\LCD_USW00013967_2024.csv
Successfully read downloaded_csv_files\OKC\LCD_USW00013967

Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\ALB\LCD_USW00014735_2020.csv
Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\ALB\LCD_USW00014735_2021.csv
Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\ALB\LCD_USW00014735_2022.csv
Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\ALB\LCD_USW00014735_2023.csv
Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\ALB\LCD_USW00014735_2024.csv
Successfully read downloaded_csv_files\ALB\LCD_USW00014735_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00014735 to downloaded_csv_files\cleaned_output\ALB.csv
Attempting to read downloaded_c

Successfully read downloaded_csv_files\CLE\LCD_USW00014820_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\CLE\LCD_USW00014820_2021.csv
Successfully read downloaded_csv_files\CLE\LCD_USW00014820_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\CLE\LCD_USW00014820_2022.csv
Successfully read downloaded_csv_files\CLE\LCD_USW00014820_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\CLE\LCD_USW00014820_2023.csv
Successfully read downloaded_csv_files\CLE\LCD_USW00014820_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\CLE\LCD_USW00014820_2024.csv
Successfully read downloaded_csv_files\CLE\LCD_USW00014820_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00014820 to downloaded_csv_files\cleaned_output\CLE.csv
Attempting to read downloaded_csv_files\MSN\LCD_USW00014837_2018.csv
Successfully read downloaded_csv_files\MSN\LCD_USW00014837_2018.csv with encoding utf-8
Attempting to read downloaded_c

Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIH\LCD_USW00022536_2020.csv
Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIH\LCD_USW00022536_2021.csv
Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIH\LCD_USW00022536_2022.csv
Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIH\LCD_USW00022536_2023.csv
Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIH\LCD_USW00022536_2024.csv
Successfully read downloaded_csv_files\LIH\LCD_USW00022536_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00022536 to downloaded_csv_files\cleaned_output\LIH.csv
Attempting to read downloaded_c

Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2019.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2020.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2021.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2022.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2023.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\SJC\LCD_USW00023293_2024.csv
Successfully read downloaded_csv_files\SJC\LCD_USW00023293

Processed and saved cleaned data for USW00093814 to downloaded_csv_files\cleaned_output\CVG.csv
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2018.csv
Successfully read downloaded_csv_files\SDF\LCD_USW00093821_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2019.csv
Successfully read downloaded_csv_files\SDF\LCD_USW00093821_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2020.csv
Successfully read downloaded_csv_files\SDF\LCD_USW00093821_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2021.csv
Successfully read downloaded_csv_files\SDF\LCD_USW00093821_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2022.csv
Successfully read downloaded_csv_files\SDF\LCD_USW00093821_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\SDF\LCD_USW00093821_2023.csv
Successfully read downloaded_csv_files\SDF\LCD_USW

Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2019.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2020.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2021.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2022.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2023.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815_2023.csv with encoding utf-8
Attempting to read downloaded_csv_files\MCO\LCD_USW00012815_2024.csv
Successfully read downloaded_csv_files\MCO\LCD_USW00012815

Processed and saved cleaned data for USW00013722 to downloaded_csv_files\cleaned_output\RDU.csv
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2018.csv
Successfully read downloaded_csv_files\PHL\LCD_USW00013739_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2019.csv
Successfully read downloaded_csv_files\PHL\LCD_USW00013739_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2020.csv
Successfully read downloaded_csv_files\PHL\LCD_USW00013739_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2021.csv
Successfully read downloaded_csv_files\PHL\LCD_USW00013739_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2022.csv
Successfully read downloaded_csv_files\PHL\LCD_USW00013739_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\PHL\LCD_USW00013739_2023.csv
Successfully read downloaded_csv_files\PHL\LCD_USW

Processed and saved cleaned data for USW00013960 to downloaded_csv_files\cleaned_output\DAL.csv
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2018.csv
Successfully read downloaded_csv_files\LIT\LCD_USW00013963_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2019.csv
Successfully read downloaded_csv_files\LIT\LCD_USW00013963_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2020.csv
Successfully read downloaded_csv_files\LIT\LCD_USW00013963_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2021.csv
Successfully read downloaded_csv_files\LIT\LCD_USW00013963_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2022.csv
Successfully read downloaded_csv_files\LIT\LCD_USW00013963_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\LIT\LCD_USW00013963_2023.csv
Successfully read downloaded_csv_files\LIT\LCD_USW

Processed and saved cleaned data for USW00023230 to downloaded_csv_files\cleaned_output\OAK.csv
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2018.csv
Successfully read downloaded_csv_files\SLC\LCD_USW00024127_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2019.csv
Successfully read downloaded_csv_files\SLC\LCD_USW00024127_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2020.csv
Successfully read downloaded_csv_files\SLC\LCD_USW00024127_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2021.csv
Successfully read downloaded_csv_files\SLC\LCD_USW00024127_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2022.csv
Successfully read downloaded_csv_files\SLC\LCD_USW00024127_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\SLC\LCD_USW00024127_2023.csv
Successfully read downloaded_csv_files\SLC\LCD_USW

Processed and saved cleaned data for USW00093738 to downloaded_csv_files\cleaned_output\IAD.csv
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2018.csv
Successfully read downloaded_csv_files\IND\LCD_USW00093819_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2019.csv
Successfully read downloaded_csv_files\IND\LCD_USW00093819_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2020.csv
Successfully read downloaded_csv_files\IND\LCD_USW00093819_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2021.csv
Successfully read downloaded_csv_files\IND\LCD_USW00093819_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2022.csv
Successfully read downloaded_csv_files\IND\LCD_USW00093819_2022.csv with encoding utf-8
Attempting to read downloaded_csv_files\IND\LCD_USW00093819_2023.csv
Successfully read downloaded_csv_files\IND\LCD_USW

Successfully read downloaded_csv_files\BNA\LCD_USW00013897_2024.csv with encoding utf-8
Processed and saved cleaned data for USW00013897 to downloaded_csv_files\cleaned_output\BNA.csv
Attempting to read downloaded_csv_files\LAX\LCD_USW00023174_2018.csv
Successfully read downloaded_csv_files\LAX\LCD_USW00023174_2018.csv with encoding utf-8
Attempting to read downloaded_csv_files\LAX\LCD_USW00023174_2019.csv
Successfully read downloaded_csv_files\LAX\LCD_USW00023174_2019.csv with encoding utf-8
Attempting to read downloaded_csv_files\LAX\LCD_USW00023174_2020.csv
Successfully read downloaded_csv_files\LAX\LCD_USW00023174_2020.csv with encoding utf-8
Attempting to read downloaded_csv_files\LAX\LCD_USW00023174_2021.csv
Successfully read downloaded_csv_files\LAX\LCD_USW00023174_2021.csv with encoding utf-8
Attempting to read downloaded_csv_files\LAX\LCD_USW00023174_2022.csv
Successfully read downloaded_csv_files\LAX\LCD_USW00023174_2022.csv with encoding utf-8
Attempting to read downloaded_c

In [None]:
#Check if all airports have 7 csvs:
missing_files_airports = []

for airport_folder in os.listdir(output_dir):
    airport_path = os.path.join(output_dir, airport_folder)

    if os.path.isdir(airport_path):
        files = os.listdir(airport_path)
        file_count = len(files)
        if file_count != 7:
            missing_files_airports.append(airport_folder)

if missing_files_airports:
    print("Folders file_number != 7：")
    for airport in missing_files_airports:
        print(airport)