In [7]:
import csv
import requests
from io import StringIO
import re
import pandas as pd

# File containing csv URLs.
txt_file = "./download-file-list-daily.txt"

# Dictionary to store the dataframes.
dataframes = {}

# Regex pattern to find the start of useful data in CSV files.
pattern = re.compile(r"^date(?!:)", re.IGNORECASE)

# Columns to import.
columns = ["date", "wdsp"]

# Set index.
index = "date"

# Read the URLs from the txt file and strip newlines.
with open(txt_file) as f:
    csv_urls = [line.strip() for line in f if line.strip()]

# Loop through the URLs.
for url in csv_urls:
    try:
        # Send GET request to the URL.
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to retrieve the CSV file. Status code: {response.status_code}")
            continue
        
        # Treat response content as file-like object.
        csv_content = response.text
        csv_file = StringIO(csv_content)

        # Find the start of useful data by searching for the regex pattern.
        csv_reader = csv.reader(csv_file)
        for row_number, row in enumerate(csv_reader):
            row_string = ",".join(row)
            if pattern.search(row_string):
                print(f"Skipped {row_number} rows in {url}")
                break
        else:
            print(f"Error finding regex pattern: \"{pattern.pattern}\" in {url}")
            continue
        
        # Reset the file pointer and import the data as a DataFrame.
        csv_file.seek(0)  # Rewind the file to the beginning for `pd.read_csv`
        df_name = url.split("webdata/")[-1].rstrip(".csv")  # Derive DataFrame name
        dataframes[df_name] = pd.read_csv(csv_file, skiprows=row_number, index_col=index, usecols=columns)
        print(f"Imported {url} as {df_name}")
    
    except Exception as e:
        print(f"An error occurred while processing {url}: {e}")


Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly1875.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly1875.csv as dly1875
Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly675.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly675.csv as dly675
Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly2375.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly2375.csv as dly2375
Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly3723.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly3723.csv as dly3723
Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly2175.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly2175.csv as dly2175
Skipped 24 rows in https://cli.fusio.net/cli/climate_data/webdata/dly3904.csv
Imported https://cli.fusio.net/cli/climate_data/webdata/dly3904.csv as dly3904
Skipped 25 rows in https://cli.fusio.net/cli/climate_data/web

In [10]:
dataframes

{'dly1875':              wdsp
 date             
 26-feb-2010   8.3
 27-feb-2010   4.3
 28-feb-2010   4.9
 01-mar-2010   2.3
 02-mar-2010   7.1
 ...           ...
 27-dec-2024   4.2
 28-dec-2024   5.0
 29-dec-2024   9.4
 30-dec-2024  12.8
 31-dec-2024  13.0
 
 [5423 rows x 1 columns],
 'dly675':              wdsp
 date             
 08-oct-2003  11.3
 09-oct-2003  10.7
 10-oct-2003   6.2
 11-oct-2003   3.0
 12-oct-2003   6.8
 ...           ...
 27-dec-2024   3.8
 28-dec-2024   4.6
 29-dec-2024   9.0
 30-dec-2024  14.2
 31-dec-2024  12.1
 
 [7736 rows x 1 columns],
 'dly2375':              wdsp
 date             
 17-sep-1956   7.4
 18-sep-1956   8.0
 19-sep-1956  11.4
 20-sep-1956  17.5
 21-sep-1956  17.4
 ...           ...
 27-dec-2024  10.5
 28-dec-2024  10.6
 29-dec-2024  18.2
 30-dec-2024  24.6
 31-dec-2024  25.6
 
 [24943 rows x 1 columns],
 'dly3723':              wdsp
 date             
 01-jan-1964      
 02-jan-1964      
 03-jan-1964      
 04-jan-1964      
 05-jan-1964     