In [1]:
import requests
import json
import psycopg2
from psycopg2.extras import execute_values
from urllib.parse import urlencode

from datetime import datetime

In [2]:
# Connect to PostgreSQL database
def connect_to_postgres():
    conn = psycopg2.connect(
        host="localhost",       
        database="lombardia_air_quality", 
        user="airdata_user",    
        password="user"
    )
    return conn

In [None]:
# CREATE TABLE
def create_table_measurement(reset_table=False):
    """
    Crea la tabella 'measurement' nel database.
    Se reset_table Ã¨ True, la tabella viene cancellata e ricreata.
    """
    conn = connect_to_postgres()
    cur = conn.cursor()
    
    if reset_table:
        print("Resetting the table 'measurement'...")
        cur.execute("DROP TABLE IF EXISTS measurement;")
    
    cur.execute("""
        CREATE TABLE IF NOT EXISTS measurement (
            idsensore TEXT,
            data TIMESTAMP,
            valore NUMERIC,
            stato TEXT,
            idoperatore TEXT,
            PRIMARY KEY (idsensore, data)
        )
    """)
    
    conn.commit()
    cur.close()
    conn.close()


In [4]:
# INSERT DATA
def insert_data_into_table(table_name, data_list):
    """
    Inserisce i dati nella tabella specificata.

    Args:
        table_name (str): Nome della tabella.
        data_list (list of dict): Lista dei dati da inserire.
    """
    if not data_list:
        print("No data to insert")
        return

    conn = connect_to_postgres()
    cursor = conn.cursor()

    # Ricava i nomi delle colonne
    columns = list(data_list[0].keys())

    # Prepara i valori
    values = [[item.get(col) for col in columns] for item in data_list]

    # Query SQL dinamica
    insert_query = f"""
    INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in columns])})
    VALUES %s
    ON CONFLICT DO NOTHING
    """

    try:
        execute_values(cursor, insert_query, values)
        conn.commit()
        print(f"Inserted {len(data_list)} records into '{table_name}'.")
    except Exception as e:
        print(f"Error inserting into '{table_name}':", e)
        conn.rollback()
    finally:
        cursor.close()
        conn.close()


In [5]:
# Fetch data from Dati Lombardia API
def fetch_data_from_api(api_url, limit=1000, order="Data DESC"):
    """
    Fetch data from the API with specified limit and order
    
    Parameters:
    - api_url: Base URL for the API
    - limit: Number of records to return (default: 1000)
    - order: Field and direction to sort by (default: "datastop DESC" for latest records)
    """
    # Construct query parameters
    params = {
        "$limit": limit,
        "$order": order
    }
    
    # Append parameters to URL
    full_url = f"{api_url}?{urlencode(params)}"
    print(f"Requesting data from: {full_url}")
    
    response = requests.get(full_url)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code {response.status_code}")

In [None]:

def load_data_from_api(api_url, table_name="measurement", limit=1000, order_by="Data DESC"):
    """
    Carica i dati da un'API e li inserisce in una tabella PostgreSQL.

    Args:
        api_url (str): URL dell'API da cui prelevare i dati.
        table_name (str): Nome della tabella di destinazione.
        limit (int): Numero massimo di record da recuperare.
        order_by (str): Campo per ordinare i risultati.
    """
    try:
        print(f"Fetching latest {limit} records from API...")
        raw_data = fetch_data_from_api(api_url, limit, order_by)
        
        print(f"Received {len(raw_data)} records from API")
        if raw_data:
            print("Sample data item:")
            print(json.dumps(raw_data[0], indent=2))
        else:
            print("Warning: API returned no data.")
            return

        print(f"Creating table '{table_name}' if it doesn't exist...")
        if table_name == "measurement":
            create_table_measurement()
            print(f"Table {table_name} created!")
        else:
            print(f"Nessuna funzione di creazione definita per la tabella '{table_name}'.")
            return

        print("Inserting data into table...")
        insert_data_into_table(table_name, raw_data)

        print("Process completed successfully!")

    except Exception as e:
        print(f"Error: {str(e)}")



In [None]:
# RUN TO CREATE AND LOAD DATA

# API URL
api_url = "https://www.dati.lombardia.it/resource/g2hp-ar79.json"

# Define the table name for your data
table_name = "measurement"

# Create the table and load data (limit=1000, order_by="Data DESC")
load_data_from_api(api_url, table_name, 5000)

Fetching latest 500000 records from API...
Requesting data from: https://www.dati.lombardia.it/resource/g2hp-ar79.json?%24limit=500000&%24order=Data+DESC
Received 500000 records from API
Sample data item:
{
  "idsensore": "30166",
  "data": "2025-01-01T00:00:00.000",
  "valore": "2.5",
  "stato": "VA",
  "idoperatore": "1"
}
Creating table 'measurement' if it doesn't exist...
Table 'measurement' created!
Inserting data into table...
Inserted 500000 records into 'measurement'.
Process completed successfully!
