In [None]:
import requests
import json
import psycopg2
from psycopg2.extras import execute_values
from urllib.parse import urlencode

from datetime import datetime

In [None]:
# Connect to PostgreSQL database
def connect_to_postgres():
    conn = psycopg2.connect(
        host="localhost",       
        database="lombardia_air_quality", 
        user="airdata_user",    
        password="user"
    )
    return conn

In [None]:
# CREATE TABLE
def create_table_measurement():
    """
    Crea la tabella sensori_aria nel database se non esiste.
    """
    conn = connect_to_postgres()
    cur = conn.cursor()
    cur.execute("""
        DROP TABLE IF EXISTS measurement;
        CREATE TABLE IF NOT EXISTS measurement (
            idsensore TEXT,
            data TIMESTAMP,
            valore NUMERIC,
            stato TEXT,
            idoperatore TEXT,
            PRIMARY KEY (idsensore, data)
        )
    """)
    conn.commit()
    cur.close()
    conn.close()


In [None]:
# INSERT DATA
def insert_data_into_table(table_name, data_list):
    """
    Inserisce i dati nella tabella specificata.

    Args:
        table_name (str): Nome della tabella.
        data_list (list of dict): Lista dei dati da inserire.
    """
    if not data_list:
        print("No data to insert")
        return

    conn = connect_to_postgres()
    cursor = conn.cursor()

    # Ricava i nomi delle colonne
    columns = list(data_list[0].keys())

    # Prepara i valori
    values = [[item.get(col) for col in columns] for item in data_list]

    # Query SQL dinamica
    insert_query = f"""
    INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in columns])})
    VALUES %s
    ON CONFLICT DO NOTHING
    """

    try:
        execute_values(cursor, insert_query, values)
        conn.commit()
        print(f"Inserted {len(data_list)} records into '{table_name}'.")
    except Exception as e:
        print(f"Error inserting into '{table_name}':", e)
        conn.rollback()
    finally:
        cursor.close()
        conn.close()


In [None]:
# Fetch data from Dati Lombardia API
def fetch_data_from_api(api_url, limit=1000, order="Data DESC"):
    """
    Fetch data from the API with specified limit and order
    
    Parameters:
    - api_url: Base URL for the API
    - limit: Number of records to return (default: 1000)
    - order: Field and direction to sort by (default: "datastop DESC" for latest records)
    """
    # Construct query parameters
    params = {
        "$limit": limit,
        "$order": order
    }
    
    # Append parameters to URL
    full_url = f"{api_url}?{urlencode(params)}"
    print(f"Requesting data from: {full_url}")
    
    response = requests.get(full_url)
    if response.status_code == 200:
        return response.json()
    else:
        raise Exception(f"API request failed with status code {response.status_code}")

In [None]:
# 5. Insert data into table
def insert_data(conn, table_name, data_list):
    cursor = conn.cursor()
    
    if not data_list:
        print("No data to insert")
        return
    
    # Get column names from the first data item
    columns = list(data_list[0].keys())
    
    # Prepare values for insertion
    values = [[item.get(col) for col in columns] for item in data_list]
    
    # Create the SQL query
    insert_query = f"""
    INSERT INTO {table_name} ({', '.join([f'"{col}"' for col in columns])})
    VALUES %s
    """
    
    # Execute the query with all values
    execute_values(cursor, insert_query, values)
    
    conn.commit()
    print(f"Inserted {len(data_list)} records into {table_name}")
    cursor.close()

In [23]:

# API URL
api_url = "https://www.dati.lombardia.it/resource/g2hp-ar79.json"

# Define the table name for your data
table_name = "measurement"

try:
    # Fetch data from API (latest 1000 records)
    print("Fetching latest 1000 records from API...")
    raw_data = fetch_data_from_api(api_url, limit=1000, order="Data DESC")
    
    # Debug: Inspect the data structure and count
    print(f"Received {len(raw_data)} records from API")
    print("Sample data item structure:")
    if raw_data:
        print(json.dumps(raw_data[0], indent=2))
    
    # Connect to PostgreSQL
    print("Connecting to PostgreSQL...")
    conn = connect_to_postgres()
    
    # Create table if it doesn't exist (using actual data to infer schema)
    print("Creating table if it doesn't exist...")
    if raw_data:
        create_table_if_not_exists(conn, table_name, raw_data[0])
    
    # Insert data into table
    print("Inserting data into table...")
    insert_data(conn, table_name, raw_data)
    
    # Close connection
    conn.close()
    print("Process completed successfully!")
    
except Exception as e:
    print(f"Error: {str(e)}")


Fetching latest 1000 records from API...
Requesting data from: https://www.dati.lombardia.it/resource/g2hp-ar79.json?%24limit=1000&%24order=Data+DESC
Received 1000 records from API
Sample data item structure:
{
  "idsensore": "30166",
  "data": "2025-01-01T00:00:00.000",
  "valore": "2.5",
  "stato": "VA",
  "idoperatore": "1"
}
Connecting to PostgreSQL...
Creating table if it doesn't exist...
Inserting data into table...
Error: 'list' object has no attribute 'keys'


In [42]:
# API URL
api_url = "https://www.dati.lombardia.it/resource/g2hp-ar79.json"

# Define the table name for your data
table_name = "measurement"

try:
    # Fetch data from API (latest 1000 records)
    print("Fetching latest 1000 records from API...")
    raw_data = fetch_data_from_api(api_url, 1000, "Data DESC")
    
    # Debug: Inspect the data structure and count
    print(f"Received {len(raw_data)} records from API")
    print("Sample data item structure:")
    if raw_data:
        print(json.dumps(raw_data[0], indent=2))
    
    # Connect to PostgreSQL
    print("Connecting to PostgreSQL...")
    conn = connect_to_postgres()
    
    # Create table if it doesn't exist (using actual data to infer schema)
    print("Creating table if it doesn't exist...")
    sensor_create_table()
    
    # Insert data into table
    print("Inserting data into table...")
    insert_data_into_table(table_name, raw_data)
    #insert_measurement_data(conn, raw_data)
    
    # Close connection
    conn.close()
    print("Process completed successfully!")
    
except Exception as e:
    print(f"Error: {str(e)}")


Fetching latest 1000 records from API...
Requesting data from: https://www.dati.lombardia.it/resource/g2hp-ar79.json?%24limit=1000&%24order=Data+DESC
Received 1000 records from API
Sample data item structure:
{
  "idsensore": "30166",
  "data": "2025-01-01T00:00:00.000",
  "valore": "2.5",
  "stato": "VA",
  "idoperatore": "1"
}
Connecting to PostgreSQL...
Creating table if it doesn't exist...
Creating table sensori_aria if it doesn't exist...
Table sensori_aria created or already exists.
Inserting data into table...
Inserted 1000 records into 'measurement'.
Process completed successfully!


In [None]:
def sensor_create_table():
    """
    Crea la tabella sensori_aria nel database se non esiste.
    """
    conn = connect_to_postgres()
    cur = conn.cursor()
    cur.execute("""
        DROP TABLE IF EXISTS measurement;
        CREATE TABLE IF NOT EXISTS measurement (
            idsensore TEXT,
            data TIMESTAMP,
            valore NUMERIC,
            stato TEXT,
            idoperatore TEXT,
            PRIMARY KEY (idsensore, data)
        )
    """)
    conn.commit()
    cur.close()
    conn.close()
    print("Table sensori_aria created or already exists.")


Creating table sensori_aria if it doesn't exist...
Table sensori_aria created or already exists.


In [None]:
from datetime import datetime

def insert_measurement_data(data):
    """
    Inserisce i dati nella tabella 'measurement'.

    Args:
        data (list of dict): Lista di dizionari con i dati da inserire.
                             Ogni dizionario deve contenere:
                             'idsensore', 'data', 'valore', 'stato', 'idoperatore'
    """

    conn = connect_to_postgres()
    cur = conn.cursor()
    insert_rows = []

    for r in data:
        try:
            dt = datetime.strptime(r.get('data'), "%Y-%m-%dT%H:%M:%S.%f")
            insert_rows.append((
                r.get('idsensore'),
                dt,
                r.get('valore'),
                r.get('stato'),
                r.get('idoperatore')
            ))
        except Exception as e:
            print(f"Errore nel parsing del record: {e} | Dato problematico: {r}")

    if insert_rows:
        try:
            cur.executemany("""
                INSERT INTO measurement (idsensore, data, valore, stato, idoperatore)
                VALUES (%s, %s, %s, %s, %s)
                ON CONFLICT (idsensore, data) DO NOTHING
            """, insert_rows)
            conn.commit()
            print(f"Inseriti {len(insert_rows)} record nella tabella 'measurement'.")
        except Exception as e:
            print("Errore durante l'inserimento dei dati:", e)
            conn.rollback()

    cur.close()
    conn.close()


In [None]:

def load_data_from_api(api_url, table_name="measurement", limit=1000, order_by="Data DESC"):
    """
    Carica i dati da un'API e li inserisce in una tabella PostgreSQL.

    Args:
        api_url (str): URL dell'API da cui prelevare i dati.
        table_name (str): Nome della tabella di destinazione.
        limit (int): Numero massimo di record da recuperare.
        order_by (str): Campo per ordinare i risultati.
    """
    try:
        print(f"Fetching latest {limit} records from API...")
        raw_data = fetch_data_from_api(api_url, limit, order_by)
        
        print(f"Received {len(raw_data)} records from API")
        if raw_data:
            print("Sample data item:")
            print(json.dumps(raw_data[0], indent=2))
        else:
            print("Warning: API returned no data.")
            return

        print(f"Creating table '{table_name}' if it doesn't exist...")
        if table_name == "measurement":
            create_table_measurement()
            print("Table 'measurement' created!")
        else:
            print(f"Nessuna funzione di creazione definita per la tabella '{table_name}'.")
            conn.close()
            return

        print("Inserting data into table...")
        insert_data_into_table(table_name, raw_data)

        conn.close()
        print("Process completed successfully!")

    except Exception as e:
        print(f"Error: {str(e)}")



In [53]:
# RUN TO CRAETE AND LOAD DATA

# API URL
api_url = "https://www.dati.lombardia.it/resource/g2hp-ar79.json"

# Define the table name for your data
table_name = "measurement"

load_data_from_api(api_url, table_name, 500)


Fetching latest 500 records from API...
Requesting data from: https://www.dati.lombardia.it/resource/g2hp-ar79.json?%24limit=500&%24order=Data+DESC
Received 500 records from API
Sample data item:
{
  "idsensore": "30166",
  "data": "2025-01-01T00:00:00.000",
  "valore": "2.5",
  "stato": "VA",
  "idoperatore": "1"
}
Creating table 'measurement' if it doesn't exist...
Table 'measurement' created!
Inserting data into table...
Inserted 500 records into 'measurement'.
Process completed successfully!
