In [1]:
import requests
import json
import psycopg2
from psycopg2.extras import execute_values
from urllib.parse import urlencode

from datetime import datetime

In [2]:
# Connect to PostgreSQL database
def connect_to_postgres():
    conn = psycopg2.connect(
        host="localhost",       
        database="lombardia_air_quality", 
        user="airdata_user",    
        password="user"
    )
    return conn

In [3]:
# CREATE TABLE
def create_table_station():
    """
    Crea la tabella sensori_aria nel database se non esiste.
    """
    conn = connect_to_postgres()
    cur = conn.cursor()
    cur.execute("""
        DROP TABLE IF EXISTS station;
        CREATE TABLE IF NOT EXISTS station (
            idsensore TEXT PRIMARY KEY,
            nometiposensore TEXT,
            unitamisura TEXT,
            idstazione TEXT,
            nomestazione TEXT,
            quota TEXT,
            provincia TEXT,
            comune TEXT,
            storico TEXT,
            datastart TIMESTAMP,
            datastop TIMESTAMP,
            utm_nord TEXT,
            utm_est TEXT,
            lat NUMERIC,
            lng NUMERIC,
            location TEXT
        )
    """)
    conn.commit()
    cur.close()
    conn.close()


In [4]:
# INSERT DATA
def insert_data_into_table(table_name, data_list):
    """
    Inserisce i dati nella tabella specificata.

    Args:
        table_name (str): Nome della tabella.
        data_list (list of dict): Lista dei dati da inserire.
    """
    if not data_list:
        print("No data to insert")
        return

    conn = connect_to_postgres()
    cursor = conn.cursor()

    # Ricava i nomi delle colonne
    columns = list(data_list[0].keys())
    
    

    # Prepara i valori
    values = [[item.get(col) for col in columns] for item in data_list]

    # Query SQL dinamica
    insert_query = f"""
    INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in columns])})
    VALUES %s
    ON CONFLICT DO NOTHING
    """

    try:
        execute_values(cursor, insert_query, values)
        conn.commit()
        print(f"Inserted {len(data_list)} records into '{table_name}'.")
    except Exception as e:
        print(f"Error inserting into '{table_name}':", e)
        conn.rollback()
    finally:
        cursor.close()
        conn.close()


In [5]:
# Fetch data from Dati Lombardia API

def fetch_data_from_api(api_url):
    """
    Fetch data from the API with specified limit and order
    
    Parameters:
    - api_url: Base URL for the API
    """
    
    # Append parameters to URL
    full_url = api_url
    print(f"Requesting data from: {full_url}")
    
    response = requests.get(full_url)
    if response.status_code == 200:
        print(response.json())
        return response.json()
    else:
        raise Exception(f"API request failed with status code {response.status_code}")

In [6]:
# FINAL FUNCTION TO LOAD DATA
def load_data_from_api(api_url, table_name):
    """
    Carica i dati da un'API e li inserisce in una tabella PostgreSQL.

    Args:
        api_url (str): URL dell'API da cui prelevare i dati.
        table_name (str): Nome della tabella di destinazione.
        limit (int): Numero massimo di record da recuperare.
        order_by (str): Campo per ordinare i risultati.
    """
    try:
        print(f"Fetching records from API...")
        raw_data = fetch_data_from_api(api_url)
        
        print(f"Received {len(raw_data)} records from API")
        if raw_data:
            print("Sample data item:")
            print(json.dumps(raw_data[0], indent=2))
        else:
            print("Warning: API returned no data.")
            return

        print(f"Creating table '{table_name}' if it doesn't exist...")
        if table_name == "station":
            create_table_station()
            print(f"Table {table_name} created!")
        else:
            print(f"Nessuna funzione di creazione definita per la tabella '{table_name}'.")
            return

        print("Inserting data into table...")
        insert_data_into_table(table_name, raw_data)

        print("Process completed successfully!")

    except Exception as e:
        print(f"Error: {str(e)}")



In [None]:
# RUN TO CREATE AND LOAD DATA

# API URL
api_url = "https://www.dati.lombardia.it/resource/ib47-atvt.json"

# Define the table name for your data
table_name = "station"

# Create the table and load data (limit=1000, order_by="Data DESC")
load_data_from_api(api_url, table_name)


Fetching records from API...
Requesting data from: https://www.dati.lombardia.it/resource/ib47-atvt.json
[{'idsensore': '12691', 'nometiposensore': 'Arsenico', 'unitamisura': 'ng/m³', 'idstazione': '560', 'nomestazione': 'Varese v.Copelli', 'quota': '383', 'provincia': 'VA', 'comune': 'Varese', 'storico': 'N', 'datastart': '2008-04-01T00:00:00.000', 'utm_nord': '5073728', 'utm_est': '486035', 'lat': '45.81697450', 'lng': '8.82024911', 'location': {'type': 'Point', 'coordinates': [8.82024911, 45.8169745]}, ':@computed_region_6hky_swhk': '1', ':@computed_region_ttgh_9sm5': '1', ':@computed_region_af5v_nc64': '3'}, {'idsensore': '5712', 'nometiposensore': 'Ozono', 'unitamisura': 'µg/m³', 'idstazione': '510', 'nomestazione': 'Inzago v.le Gramsci', 'quota': '138', 'provincia': 'MI', 'comune': 'Inzago', 'storico': 'S', 'datastart': '2001-02-24T00:00:00.000', 'datastop': '2018-01-01T00:00:00.000', 'utm_nord': '5043030', 'utm_est': '538012', 'lat': '45.53976956', 'lng': '9.48689669', 'location