Import necessary libraries

In [9]:
import pandas as pd
import requests
import os
from datetime import datetime


# Downloading data (Traffic and Air quality). 
Automatically downloads from the following dates :

In [10]:

download_from = datetime(2023, 1, 1)  # Example: January 2023
download_until = datetime(2024, 4, 30) # Or set a specific end date

Then run the following to actually download the files.

In [None]:

# Catalan month names
catalan_months = {
    1: "Gener",
    2: "Febrer",
    3: "Marc",
    4: "Abril",
    5: "Maig",
    6: "Juny",
    7: "Juliol",
    8: "Agost",
    9: "Setembre",
    10: "Octubre",
    11: "Novembre",
    12: "Desembre",
}

# Trams transit relacio (relation between ids and locations)
trams_relacio_url = "https://opendata-ajuntament.barcelona.cat/data/dataset/1090983a-1c40-4609-8620-14ad49aae3ab/resource/1d6c814c-70ef-4147-aa16-a49ddb952f72/download/transit_relacio_trams.csv"
trams_relacio_path = "./data/transit_relacio_trams.csv"

# Air quality stations info (including lat and long)
# (The stations are unchanged since 2023 so downloading only the 2025 version is enough)
air_stations_info_url = "https://opendata-ajuntament.barcelona.cat/data/dataset/4dff88b1-151b-48db-91c2-45007cd5d07a/resource/d1aa40d7-66f9-451b-85f8-955b765fdc2f/download/2025_qualitat_aire_estacions.csv"
air_stations_info_path = "./data/air_stations_info.csv"

def generate_urls(download_from, download_until):
    urls = []
    current_date = datetime(download_from.year, download_from.month, 1)
    end_date = datetime(download_until.year, download_until.month, 1)

    while current_date <= end_date:
        year = current_date.year
        month = current_date.month
        month_name = catalan_months[month]

        # Generate URLs for both datasets
        tram_url = f"https://opendata-ajuntament.barcelona.cat/resources/auto/transit/{year}_{month:02d}_{month_name}_TRAMS_TRAMS.csv"
        aire_url = f"https://opendata-ajuntament.barcelona.cat/resources/bcn/QualitatAire/{year}_{month:02d}_{month_name}_qualitat_aire_BCN.csv"

        tram_filename = f"data/TRAMS_{year}_{month:02d}_{month_name}.csv"
        aire_filename = f"data/QualitatAire_{year}_{month:02d}_{month_name}.csv"

        urls.append((tram_url, tram_filename))
        urls.append((aire_url, aire_filename))

        # Move to the next month
        if current_date.month == 12:
            current_date = datetime(current_date.year + 1, 1, 1)
        else:
            current_date = datetime(current_date.year, current_date.month + 1, 1)

    return urls

def download_file(url, filename):
    if os.path.exists(filename):
        print(f"File already exists: {filename}. Skipping download.")
        return

    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(filename, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        print(f"Downloaded: {filename}")
    except Exception as e:
        print(f"Failed to download {filename}: {e}")


# Create a "data" folder:
if not os.path.exists("./data"):
    os.mkdir("./data")

urls = generate_urls(download_from, download_until)

print("Dwonloading files...")
for url, filename in urls:
    download_file(url, filename)


# Then download the TRAMS relacio file
if not os.path.exists(trams_relacio_path):
    download_file(trams_relacio_url, trams_relacio_path)


# And the air stations info 
if not os.path.exists(air_stations_info_path):
    download_file(air_stations_info_url, air_stations_info_path)

Dwonloading files...
File already exists: data/TRAMS_2023_01_Gener.csv. Skipping download.
File already exists: data/QualitatAire_2023_01_Gener.csv. Skipping download.
File already exists: data/TRAMS_2023_02_Febrer.csv. Skipping download.
File already exists: data/QualitatAire_2023_02_Febrer.csv. Skipping download.
File already exists: data/TRAMS_2023_03_Marc.csv. Skipping download.
File already exists: data/QualitatAire_2023_03_Marc.csv. Skipping download.
File already exists: data/TRAMS_2023_04_Abril.csv. Skipping download.
File already exists: data/QualitatAire_2023_04_Abril.csv. Skipping download.
File already exists: data/TRAMS_2023_05_Maig.csv. Skipping download.
File already exists: data/QualitatAire_2023_05_Maig.csv. Skipping download.
File already exists: data/TRAMS_2023_06_Juny.csv. Skipping download.
File already exists: data/QualitatAire_2023_06_Juny.csv. Skipping download.
File already exists: data/TRAMS_2023_07_Juliol.csv. Skipping download.
File already exists: data/Qual

# Data treatment and cleaning


## Creating a merged dataset

### Combining all the air quality files

In [None]:
air_quality_files = [filename for url, filename in urls if "QualitatAire" in filename]
print(air_quality_files)

# Covnert to pandas dataframes
air_quality_dfs = [pd.read_csv(file) for file in air_quality_files]
# Combine them into a single dataframe
air_quality_combined = pd.concat(air_quality_dfs, ignore_index=True)
print(air_quality_combined.head())

['data/QualitatAire_2023_01_Gener.csv', 'data/QualitatAire_2023_02_Febrer.csv', 'data/QualitatAire_2023_03_Marc.csv', 'data/QualitatAire_2023_04_Abril.csv', 'data/QualitatAire_2023_05_Maig.csv', 'data/QualitatAire_2023_06_Juny.csv', 'data/QualitatAire_2023_07_Juliol.csv', 'data/QualitatAire_2023_08_Agost.csv', 'data/QualitatAire_2023_09_Setembre.csv', 'data/QualitatAire_2023_10_Octubre.csv', 'data/QualitatAire_2023_11_Novembre.csv', 'data/QualitatAire_2023_12_Desembre.csv', 'data/QualitatAire_2024_01_Gener.csv', 'data/QualitatAire_2024_02_Febrer.csv', 'data/QualitatAire_2024_03_Marc.csv', 'data/QualitatAire_2024_04_Abril.csv']
   CODI_PROVINCIA  PROVINCIA  CODI_MUNICIPI   MUNICIPI  ESTACIO  \
0               8  Barcelona             19  Barcelona        4   
1               8  Barcelona             19  Barcelona        4   
2               8  Barcelona             19  Barcelona        4   
3               8  Barcelona             19  Barcelona        4   
4               8  Barcelona  