In [10]:
# Location URLS
locations = [
    "Port%20of%20Spain%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "San%20Fernando%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Arima%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Sangre%20Grande%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Tunapuna-Piarco%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Crown%20Point%2C%20Tobago%2C%20Trinidad%20and%20Tobago",
    "Scarborough%2C%20Tobago%2C%20Trinidad%20and%20Tobago",    
    "Rio%20Claro%20-%20Mayaro%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Siparia%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Chaguanas%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Diego%20Martin%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Princes%20Town%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Couva-Tabaquite-Talparo%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Point%20Fortin%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "San%20Juan%20-%20Laventille%2C%20Trinidad%2C%20Trinidad%20and%20Tobago",
    "Penal%20-%20Debe%2C%20Trinidad%2C%20Trinidad%20and%20Tobago"
]

#API Keys used to acquire data
Keys =  ["YE8Q69MV94F3F7P7KK22RGSZ3",
         "GJYFJ3P7MALKURPFM5FNQ6XFU",
         "CLCPHZRFTSSMC57NMAJZWHJEK",
         "DX7FS89DEGDLSCES4HBAUL5QN",
         "ML75W65AKCQB2BKBFGGDWX47W",
         "PUSVDCB5AR284TUWQ6XSSNF4C",
         "W9CC59HQJRNJ7Z69J7GJHJFPB",
        ]

In [3]:
import urllib.request
import csv
import codecs

# Define base URL and your API key
base_url = "https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/"
location = "Penal%20-%20Debe%2C%20Trinidad%2C%20Trinidad%20and%20Tobago"
loc = "Penal-Debe"
key = "DX7FS89DEGDLSCES4HBAUL5QN"
unit_group = "us"
include = "days"
content_type = "csv"

# Define the year range you want to query
start_year = 2000
end_year = 2025

# Function to fetch data for a specific year and save to a CSV file
def fetch_weather_data_for_year(year):
    url = f"{base_url}{location}/{year}-01-01/{year}-12-31?unitGroup={unit_group}&include={include}&key={key}&contentType={content_type}"
    
    try:
        # Make the request to fetch data
        ResultBytes = urllib.request.urlopen(url)
        
        # Parse the result as CSV
        CSVText = csv.reader(codecs.iterdecode(ResultBytes, 'utf-8'))
        
        # Open the CSV file in append mode to add data for each year
        with open(f"weather_data_{loc}.csv", mode='a', newline='', encoding='utf-8') as csv_file:
            csv_writer = csv.writer(csv_file)
            
            # Write each row from the CSVText to the file
            for row in CSVText:
                csv_writer.writerow(row)
        
        print(f"Weather data for {year} has been saved to 'weather_data.csv'")
    
    except Exception as e:
        print(f"Error occurred while fetching data for {year}: {e}")

Initiall Call of Function to download csv data. Iterated from 2000 to 2025.

In [None]:
# Loop through each year in the defined range
for year in range(start_year, end_year + 1):
    fetch_weather_data_for_year(year)

The initial call was insufficient to acquire all the desired years. It would download a few and skip over certain years inbetween due to daily download limitations for the API key being used. Therefore, the initial function was modified to take in perameters such as a new API key as well as a list with the missing years for them to be acquired.

In [None]:
import time
import requests

def fetch_weather_data(year, location, loc, base_url, key, unit_group, include, content_type):
    url = f"{base_url}{location}/{year}-01-01/{year}-12-31?unitGroup={unit_group}&include={include}&key={key}&contentType={content_type}"
    filename = f"weather_data_{loc}_{year}.csv"
    
    for attempt in range(5):  # Try 5 times if necessary
        try:
            response = requests.get(url)
            if response.status_code == 200:
                with open(filename, "wb") as f:
                    f.write(response.content)
                print(f"Weather data for {year} has been saved to {filename}")
                break  # If successful, stop retrying
            else:
                print(f"Failed for {year} with status code {response.status_code}")
                time.sleep(20)  # Sleep before retry
        except requests.exceptions.RequestException as e:
            print(f"Error occurred while fetching data for {year}: {e}")
            time.sleep(20)  # Sleep before retry

# Retry only the years where 429 was encountered
years_to_retry = [2017, 2018, 2020, 2021, 2022, 2023, 2024, 2025]
key = "ML75W65AKCQB2BKBFGGDWX47W"

for year in years_to_retry:
    fetch_weather_data(year, location, loc, base_url, key, unit_group, include, content_type)

All the files stored for the specific location were then retrieved from the appropriate directory, merged in ascending order (2000 - 2025) and then written to a csv.

In [None]:
import pandas as pd
import glob
import os

# Folder where your CSVs are saved
folder_path = f"./Location/{loc}/"

# Step 1: Load all CSVs into one big DataFrame
all_files = glob.glob(os.path.join(folder_path, "*.csv"))
all_dfs = []

for file in all_files:
    df = pd.read_csv(file)

    # 🔥 Skip rows where 'datetime' column is literally 'datetime' (header inside data)
    df = df[df['datetime'] != 'datetime']

    all_dfs.append(df)

# Step 2: Combine ALL loaded data
combined_df = pd.concat(all_dfs, ignore_index=True)

# Step 3: Parse datetime properly
combined_df['datetime'] = pd.to_datetime(combined_df['datetime'], errors='coerce')

# Step 4: Drop any rows where datetime couldn't be parsed
combined_df = combined_df.dropna(subset=['datetime'])

# Step 5: Sort data properly by 'datetime'
combined_df = combined_df.sort_values('datetime')

# Step 6: Filter to only years 2000 to 2025
combined_df = combined_df[(combined_df['datetime'].dt.year >= 2000) & (combined_df['datetime'].dt.year <= 2025)]

# Step 7: Reset index
combined_df.reset_index(drop=True, inplace=True)

# Step 8: Save to a new CSV
combined_df.to_csv(f"{loc}_weather_data_2000_2025.csv", index=False)

print(f"✅ All files successfully combined into '{loc}_weather_data_2000_2025.csv'.")

The data was read in and cleaned before being re-written to the csv

In [31]:
df = pd.read_csv(f'{loc}_weather_data_2000_2025.csv')

# If a record exists without values for these three variables, then that record has no significant data
columns_to_check = ['temp', 'tempmax', 'tempmin']

# Drop rows where all of these columns have missing values
df_cleaned = df.dropna(subset=columns_to_check, how='all')

# Drop unnecessary columns. Columns with no relevance to Trinidad and Tobago (like 'snow'), columns with recurring values, and columns with excessive missing values.
df_cleaned = df_cleaned.drop(columns=['snow', 'snowdepth', 'preciptype', 'windgust', 'severerisk'])

# Save the cleaned data to a new CSV file
df_cleaned.to_csv(f'{loc}_weather_data_2000_2025.csv', index=False)