In [None]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# dataset

> Create a dataset of storm matches

In [None]:
#| default_exp dataset

In [None]:
#| export
from pathlib import Path
import geopandas as gpd
import sqlite3
import datetime
import numpy as np
import gc


from tathu.io import spatialite
from tathu.constants import KM_PER_DEGREE

import exp4.core
from exp4.core import *
#from exp4.core import load_relato

## Loading PreVots data

In [None]:
#| hide

# Old and new prevots reports
pth_relatos = Path(r"C:\Users\caioa\TESE\Experimento_3\relatos\granizo_2018-2023_novos.csv")
relatos = load_relato(pth_relatos)


# Brazilian municipalities data
pth_ibge = Path(r"C:\Users\caioa\TESE\Experimento_3\ibge")
shp_cities = gpd.read_file(pth_ibge / "BR_MUNICIPIOS_2022" / "BR_MUNICIPIOS_2022.shp")
shp_cities.set_index(["NM_MUN", "SIGLA_UF"], inplace=True)


# Dbs of tracked storms
pth_db = Path(r"C:\Users\caioa\TESE\Experimento_3\dbs_v2")
#db_name = "20230101_20231231_systems-db.sqlite"
table_name = "systems"


  df_relatos = pd.read_csv(pth,
  df_relatos = pd.read_csv(pth,


In [None]:
#| hide
# Espaços sozinhos no início e fim da string
relatos[["cidade ", "uf"]] = relatos[["cidade ", "uf"]].replace(r"^ +| +$", r"", regex=True) #Cidade tem espaço 'cidade '
#relatos.head(4)
relatos.tail(4)

Unnamed: 0,date_time,qualidade,dt_min,lat,lon,dx_km,cidade,uf,tipo,intensidade,geometry,buffer
13147,2023-09-28 19:30:00,1,15.0,-19.9777,-42.1451,3.0,Santa Barbara do Leste,MG,GRA,1.0,POINT (-42.14510 -19.97770),"POLYGON ((-41.93849 -19.97770, -41.93948 -19.9..."
13148,2023-09-28 20:48:00,1,10.0,-18.1654,-47.9458,5.0,Catalão,GO,GRA,1.5,POINT (-47.94580 -18.16540),"POLYGON ((-47.72122 -18.16540, -47.72230 -18.1..."
13149,2023-09-30 19:45:00,1,15.0,-22.348,-46.938,5.0,Mogi Guaçu,SP,GRA,3.0,POINT (-46.93800 -22.34800),"POLYGON ((-46.71342 -22.34800, -46.71450 -22.3..."
13150,2023-09-30 20:00:00,1,15.0,-22.2783,-46.9536,5.0,Estiva Gerbi,SP,GRA,4.0,POINT (-46.95360 -22.27830),"POLYGON ((-46.72902 -22.27830, -46.73010 -22.3..."


In [None]:
#| hide
total_rel = len(relatos)
total_rel


13151

## Iterating to build the Database


In [None]:
#| hide

# Query for intersection by date and PreVots coords
intersec_query = f"""
    SELECT
        name, min, mean, std, count, event, relationships,
        strftime('%Y-%m-%d %H:%M:00', date_time) as date,
        ST_AsBinary(geom) as geom,
        ST_Area(ST_Intersection(ST_Buffer(MakePoint(?, ?), ?), geom)) as intersection
    FROM
        systems
    WHERE
        date BETWEEN ? AND ?
        AND ST_Intersects(
            ST_Buffer(MakePoint(?, ?), ?), 
            geom
        )
    ORDER BY
        intersection DESC
"""

#Query by name for relative systems
name_query = f"""
    SELECT
        name, min, mean, std, count, event, relationships,
        strftime('%Y-%m-%d %H:%M:00', date_time) as date,
        ST_AsBinary(geom) as geom
    FROM
        systems
    WHERE
        name = ? OR relationships = ? OR name = ? 
    ORDER BY
        date ASC
"""

#Query just by name of relative systems
justname_query = f"""
    SELECT
        name, min, mean, std, count, event, relationships,
        strftime('%Y-%m-%d %H:%M:00', date_time) as date,
        ST_AsBinary(geom) as geom
    FROM
        systems
    WHERE
        name = ?  
    ORDER BY
        date ASC
"""


In [None]:
#| export
from sqlalchemy.orm import declarative_base
import geoalchemy2
from geoalchemy2 import load_spatialite, WKTElement
from sqlalchemy import create_engine, Column, Integer, Float, String, DateTime, ForeignKey
from sqlalchemy.event import listen
import pandas as pd
from geoalchemy2.shape import from_shape
from sqlalchemy.orm import sessionmaker, relationship
import os

os.environ['SPATIALITE_LIBRARY_PATH'] = "C:\\Users\\caioa\\mambaforge\\envs\\fast-env\\Library\\bin\\mod_spatialite.dll"
#SPATIALITE_LIBRARY_PATH = Path(r"C:\Users\caioa\mambaforge\envs\fast-env\Library\bin")


Base = declarative_base()

class Storm(Base):
    """
        Table to store unique storms and their evolving identifiers through splits and merges
    """
    __tablename__ = "storms"
    
    id = Column(Integer, primary_key=True)
    identifier = Column(String, index=True)
    ##TODO: Additional fields to track the first and last appearance


class StormEvent(Base):
    """
        Table to store each event, corresponding to the storm's physical data in each satellite scene
    """
    __tablename__ = "storm_events"

    id = Column(Integer, primary_key=True)
    storm_id = Column(Integer, ForeignKey("storms.id"))
    datetime = Column(DateTime)
    mean_bt = Column(Float)
    min_bt = Column(Float)
    std_dev_bt = Column(Float)
    count = Column(Integer)
    event_type = Column(String)    
    geometry = Column(geoalchemy2.Geometry(geometry_type = "POLYGON"))
    storm = relationship("Storm", backref="events")


class Intersection(Base):
    """
        Table to store the matches of storm polygons and hail reports
    """
    __tablename__ = "intersections"
    
    id = Column(Integer, primary_key=True)
    storm_event_id = Column(Integer, ForeignKey("storm_events.id"))
    #hail_report_id = Column(Integer, ForeignKey("hail_reports.id"))
    intersection_time = Column(DateTime)
    intersection_geom = Column(geoalchemy2.Geometry(geometry_type = "POLYGON"))
    storm_event = relationship("StormEvent", backref="intersections")
    

In [None]:
#| export

engine = create_engine('sqlite:///full_database.db', echo=True)  # SQLite database file
listen(engine, "connect", load_spatialite)

conn_w = engine.connect()

In [None]:
#| export
conn_w.close()

Base.metadata.create_all(engine)  # Create tables

2024-04-26 19:13:38,423 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-04-26 19:13:38,424 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("storms")
2024-04-26 19:13:38,425 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-04-26 19:13:38,425 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("storms")
2024-04-26 19:13:38,426 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-04-26 19:13:38,427 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("storm_events")
2024-04-26 19:13:38,427 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-04-26 19:13:38,428 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("storm_events")
2024-04-26 19:13:38,428 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-04-26 19:13:38,429 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("intersections")
2024-04-26 19:13:38,429 INFO sqlalchemy.engine.Engine [raw sql] ()
2024-04-26 19:13:38,430 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("intersections")
2024-04-26 19:13:38,431 INFO sqlalchemy.engine

In [None]:
#| hide

Session = sessionmaker(bind=engine)

year = 0
for rel_num, relato in relatos.iterrows():

    print(f"Processing Relato {rel_num} out of {total_rel} \n")

    
    # Year for the database search and connection
    if year != relato["date_time"].year:
        
        # Update year
        year = relato["date_time"].year
        
        # Path to the respective database and connect
        db_file = [db_name for db_name in pth_db.glob(f"{str(year)}*")][0]
        conn = connect_db(pth_db / db_file)

    else:
        pass # skip in case year is the same 

    
    # Get reports specs for query
    dt = relato["dt_min"] # Report's time uncertainty

    # Event window (datetime +- PreVots time uncertainty)
    date_start = datetime.datetime.strftime(relato["date_time"] - datetime.timedelta(minutes = int(dt)),
                           '%Y-%m-%d %H:%M:00')
    
    date_end = datetime.datetime.strftime(relato["date_time"] + datetime.timedelta(minutes = int(dt)),
                           '%Y-%m-%d %H:%M:00')


    # Buffer (10Km + event uncertainty) corrected to degree
    buffer_size = (relato["dx_km"] + 10) / KM_PER_DEGREE


    # Params for query-intersect
    params = (relato["lon"], relato["lat"], 
              buffer_size, 
              date_start, date_end, 
              relato["lon"], relato["lat"], 
              buffer_size)

    gdf_filtered = query2gdf(conn, intersec_query, params, "geom")

    if gdf_filtered.empty: # In case there is no match for the hail report
        continue

    # Search for related storm polygons (same storm)
    best_match = gdf_filtered.iloc[0,:]

    # Check names of match
    name_params = (best_match["name"], best_match["name"], best_match["relationships"]) # Case same name, case split of the storm, case match is the split of the storm
  
    # Query related storms
    gdf_related = query2gdf(conn, name_query, name_params, "geom")


    #Session = sessionmaker(bind=engine)
    #session = Session()
    with Session() as session:
        # Load and insert data
        for index, row in gdf_related.iterrows():
        
            # Check if the unique storm already exists
            storm = session.query(Storm).filter_by(identifier=row['name']).first() # First storm event is always spontaneous
            if not storm:
                storm = session.query(Storm).filter_by(identifier=row['relationships']).first() # In case the origin of the split is already in the db
        
            if not storm:
                storm = Storm(identifier=best_match['name'])
                session.add(storm)
                session.commit()
        
                # Insert storm event
            storm_event = StormEvent(
                storm_id = storm.id,
                datetime=pd.to_datetime(row['date'],  format='%Y-%m-%d %H:%M:00'),
                mean_bt=row['mean'],
                min_bt=row['min'],
                std_dev_bt=row['std'],
                count=row["count"],
                event_type=row['event'],
                geometry=WKTElement(row['geom'].wkt)
                #geometry=from_shape(row['geom'], srid=4326)  # Ensure SRID matches your data
            )
            session.add(storm_event)
            session.commit()  # Commit to ensure 'storm_event' has an 'id' before using it in 'Intersection'
    
            # Adding Prevots
            #hail_report = HailReport(datetime=relato['date_time'], location=from_shape(Point(relato['lon'], relato['lat']), srid=4326))
            #session.add(hail_report)
            #session.commit()  # Commit to get an ID for the hail report
            
            if row[:6].equals(best_match[:6]): 
                print("adding intesection \n")
    
                intersec_geom = row["geom"].intersection(relato["buffer"])
        
                intersection = Intersection(
                    storm_event_id = storm_event.id,
                    intersection_time = pd.to_datetime(row['date'],  format='%Y-%m-%d %H:%M:00'),
                    intersection_geom = WKTElement(intersec_geom.wkt)
                    )
                session.add(intersection)
        
        
        
        # Commit the session to save all changes
        session.commit()

2024-04-26 19:15:16,928 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-04-26 19:15:16,929 INFO sqlalchemy.engine.Engine SELECT storms.id AS storms_id, storms.identifier AS storms_identifier 
FROM storms 
WHERE storms.identifier = ?
 LIMIT ? OFFSET ?
2024-04-26 19:15:16,930 INFO sqlalchemy.engine.Engine [cached since 58.67s ago] ('f3d6b4c7-882b-429d-a377-263f52b318a5', 1, 0)
2024-04-26 19:15:16,931 INFO sqlalchemy.engine.Engine SELECT storms.id AS storms_id, storms.identifier AS storms_identifier 
FROM storms 
WHERE storms.identifier = ?
 LIMIT ? OFFSET ?
2024-04-26 19:15:16,932 INFO sqlalchemy.engine.Engine [cached since 58.67s ago] ('', 1, 0)
2024-04-26 19:15:16,933 INFO sqlalchemy.engine.Engine INSERT INTO storms (identifier) VALUES (?)
2024-04-26 19:15:16,933 INFO sqlalchemy.engine.Engine [cached since 58.67s ago] ('f3d6b4c7-882b-429d-a377-263f52b318a5',)
2024-04-26 19:15:16,934 INFO sqlalchemy.engine.Engine COMMIT
2024-04-26 19:15:16,935 INFO sqlalchemy.engine.Engine BEGIN (im

## Example Query 1: Retrieve all storm events (whole lifecycle) associated with the first storm in the database

In [None]:
#| hide
import geopandas as gpd
from shapely import wkt
from geoalchemy2.shape import to_shape


# Assuming session is set up
first_storm = session.query(Storm).first()
if first_storm:
    # Dictionary to organize the output
    data = [{
        "name": first_storm.identifier,
        "datetime": event.datetime,
        "mean_bt": event.mean_bt,
        "geometry": to_shape(event.geometry),
        "event_type": event.event_type
    } for event in first_storm.events]

    # Store the results in a GeoDataFrame
    gdf = gpd.GeoDataFrame(data, geometry="geometry", crs="EPSG:4326")
    #print(gdf) # Uncomment if working with scripts instead of notebook
else:
    print("No storms found in the database.")


gdf #Notebook only


2024-04-26 19:15:27,973 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2024-04-26 19:15:27,975 INFO sqlalchemy.engine.Engine SELECT storms.id AS storms_id, storms.identifier AS storms_identifier 
FROM storms
 LIMIT ? OFFSET ?
2024-04-26 19:15:27,975 INFO sqlalchemy.engine.Engine [generated in 0.00051s] (1, 0)
2024-04-26 19:15:27,977 INFO sqlalchemy.engine.Engine SELECT storm_events.id AS storm_events_id, storm_events.storm_id AS storm_events_storm_id, storm_events.datetime AS storm_events_datetime, storm_events.mean_bt AS storm_events_mean_bt, storm_events.min_bt AS storm_events_min_bt, storm_events.std_dev_bt AS storm_events_std_dev_bt, storm_events.count AS storm_events_count, storm_events.event_type AS storm_events_event_type, AsEWKB(storm_events.geometry) AS storm_events_geometry 
FROM storm_events 
WHERE ? = storm_events.storm_id
2024-04-26 19:15:27,978 INFO sqlalchemy.engine.Engine [generated in 0.00041s] (1,)


Unnamed: 0,name,datetime,mean_bt,geometry,event_type
0,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 17:45:00,231.896277,"POLYGON ((-48.70090 -26.84396, -48.61104 -26.8...",SPONTANEOUS_GENERATION
1,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 18:00:00,231.074087,"POLYGON ((-48.55713 -26.86192, -48.41337 -26.8...",CONTINUITY
2,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 18:15:00,230.789725,"POLYGON ((-48.46728 -27.05956, -48.57510 -27.0...",CONTINUITY
3,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 18:30:00,230.924504,"POLYGON ((-48.05396 -26.82599, -47.98208 -26.8...",CONTINUITY
4,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 18:45:00,230.933879,"POLYGON ((-47.76644 -26.86192, -47.64065 -26.8...",CONTINUITY
5,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 19:00:00,230.608942,"POLYGON ((-47.80238 -26.86192, -47.73050 -26.8...",CONTINUITY
6,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 19:15:00,230.790061,"POLYGON ((-47.62268 -26.84396, -47.53282 -26.8...",CONTINUITY
7,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 19:30:00,231.307595,"POLYGON ((-47.44297 -26.82599, -47.31718 -26.8...",CONTINUITY
8,a9b41e50-7ff7-40b8-bc0f-e488e357ae78,2018-06-05 19:45:00,231.368544,"POLYGON ((-47.33515 -26.82599, -47.31718 -26.8...",CONTINUITY


## Example Query 2: Retrieve all intersections with hailstorm reports of the first storm together with the specific storm event of intersection

In [None]:
if first_storm:
    data = [{
        "name": first_storm.identifier,
        "datetime": event.datetime,
        "mean_bt": event.mean_bt,
        "event_type": event.event_type,
        "intersection_geometry": to_shape(intersection.intersection_geom)
    } for event in first_storm.events for intersection in event.intersections]
    
    intersec_gdf = gpd.GeoDataFrame(data, geometry="intersection_geometry", crs="EPSG:4326")
    #print(intersec_gdf) ## Uncomment if working with scripts instead of notebook
else:
    print("No storms found in the database.")

intersec_gdf # just for notebooks
#data

## Example Query 3: Retrieve all "storm_events" of the storm's whole lifecycle based on an intersection with hail repor

In [None]:
first_intersection = session.query(Intersection).first() #Based on the first intersection
if first_intersection:
    storm = first_intersection.storm_event.storm
    data = [{
        "name": storm.identifier,
        "Event ID": event.id,
        "DateTime": event.datetime,
        "Geometry": to_shape(event.geometry)
    } for event in storm.events]
    
    gdf = gpd.GeoDataFrame(data, geometry="Geometry", crs="EPSG:4326")
    #print(gdf) ## Uncomment if working with scripts instead of notebook
else:
    print("No intersections found in the database.")
gdf

## Testing area Databases

In [None]:
#| hide
"""
year = 2018
db_file = [db_name for db_name in pth_db.glob(f"{str(year)}*")][0]

relato_teste = relatos.iloc[10,:]
relato_teste
"""

In [None]:
#| hide
"""
# Update year
year = relato_teste["date_time"].year

# Path to the respective database and connect
db_file = [db_name for db_name in pth_db.glob(f"{str(year)}*")][0]
conn = connect_db(pth_db / db_file)

# Time uncertainty
time_dt = relato_teste["dt_min"] + 5#

# Janela temporal do evento (horário +- incerteza temporal do relato)
date_start = datetime.datetime.strftime(relato_teste["date_time"] - datetime.timedelta(minutes = int(time_dt)),
                       '%Y-%m-%d %H:%M:00')
date_end = datetime.datetime.strftime(relato_teste["date_time"] + datetime.timedelta(minutes = int(time_dt)),
                       '%Y-%m-%d %H:%M:00')

# Buffer de área do evento (20Km + incerteza espacial do evento) corrigida para grau
buffer_size = (relato_teste["dx_km"] + 20) / KM_PER_DEGREE


# Parametros passados para a query no banco de dados
params = (relato_teste.lon, relato_teste.lat, 
          buffer_size, 
          date_start, date_end, 
          relato_teste.lon, relato_teste.lat, 
          buffer_size)

gdf_filtered = query2gdf(conn, intersec_query, params, "geom")


best_match = gdf_filtered.iloc[0,:]
best_match
"""

'\n# Update year\nyear = relato_teste["date_time"].year\n\n# Path to the respective database and connect\ndb_file = [db_name for db_name in pth_db.glob(f"{str(year)}*")][0]\nconn = connect_db(pth_db / db_file)\n\n# Time uncertainty\ntime_dt = relato_teste["dt_min"] + 5#\n\n# Janela temporal do evento (horário +- incerteza temporal do relato)\ndate_start = datetime.datetime.strftime(relato_teste["date_time"] - datetime.timedelta(minutes = int(time_dt)),\n                       \'%Y-%m-%d %H:%M:00\')\ndate_end = datetime.datetime.strftime(relato_teste["date_time"] + datetime.timedelta(minutes = int(time_dt)),\n                       \'%Y-%m-%d %H:%M:00\')\n\n# Buffer de área do evento (20Km + incerteza espacial do evento) corrigida para grau\nbuffer_size = (relato_teste["dx_km"] + 20) / KM_PER_DEGREE\n\n\n# Parametros passados para a query no banco de dados\nparams = (relato_teste.lon, relato_teste.lat, \n          buffer_size, \n          date_start, date_end, \n          relato_teste.l

In [None]:
#| hide
"""
# Check names of match
name_params = (best_match["name"], best_match["name"], best_match["relationships"]) # Case same name, case split of the storm, case match is the split of the storm
#name_params
gdf_related = query2gdf(conn, name_query, name_params, "geom")
gdf_related
"""

'\n# Check names of match\nname_params = (best_match["name"], best_match["name"], best_match["relationships"]) # Case same name, case split of the storm, case match is the split of the storm\n#name_params\ngdf_related = query2gdf(conn, name_query, name_params, "geom")\ngdf_related\n'

## Testing Area

In [None]:
#| hide
"""
year = 0
for _, relato in relatos.iterrows():

    # Year for the database search and connection
    if year != relato["date_time"].year:
        
        # Update year
        year = relato["date_time"].year
        
        # Path to the respective database and connect
        db_file = [db_name for db_name in pth_db.glob(f"{str(year)}*")][0]
        conn = connect_db(pth_db / db_file)

    else:
        pass # skip in case year is the same 

    
    # Get reports specs for query
    dt = relato["dt_min"] # Report's time uncertainty

    # Event window (datetime +- PreVots time uncertainty)
    date_start = datetime.datetime.strftime(relato["date_time"] - datetime.timedelta(minutes = int(dt)),
                           '%Y-%m-%d %H:%M:00')
    
    date_end = datetime.datetime.strftime(relato["date_time"] + datetime.timedelta(minutes = int(dt)),
                           '%Y-%m-%d %H:%M:00')


    # Buffer (10Km + event uncertainty) corrected to degree
    buffer_size = (relato["dx_km"] + 10) / KM_PER_DEGREE


    # Params for query-intersect
    params = (relato["lon"], relato["lat"], 
              buffer_size, 
              date_start, date_end, 
              relato["lon"], relato["lat"], 
              buffer_size)

    gdf_filtered = query2gdf(conn, intersec_query, params, "geom")

    if gdf_filtered.empty:
        continue

    # Search for related storm polygons
    best_match = gdf_filtered.iloc[0,:]
    name_params = (best_match["name"], best_match["relationships"])

    # Check names of match
    name_params = (best_match["name"], best_match["name"], best_match["relationships"]) # Case same name, case split of the storm, case match is the split of the storm
  
"""

In [None]:
#| hide
"""
from sqlalchemy.orm import DeclarativeBase#declarative_base
from geoalchemy2 import Geometry
from sqlalchemy import create_engine, Column, Integer, Float, String, DateTime, ForeignKey

#from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker
from geoalchemy2.shape import from_shape
from shapely.geometry import Point
import geopandas as gpd
from sqlalchemy.orm import sessionmaker, relationship

#Base = declarative_base()
class Base(DeclarativeBase):
    pass

class Storm(Base):
    """
        Table to store unique storms and their evolving identifiers through splits and merges
    """
    __tablename__ = "storms"
    
    id = Column(Integer, primary_key=True)
    identifier = Column(String, index=True)
    ##TODO: Additional fields to track the first and last appearance


class StormEvent(Base):
    """
        Table to store each event, corresponding to the storm's physical data in each satellite scene
    """
    __tablename__ = "storm_events"

    id = Column(Integer, primary_key=True)
    storm_id = Column(Integer, ForeignKey("storms.id"))
    event_type = Column(String)
    datetime = Column(DateTime)
    mean_bt = Column(Float)
    min_bt = Column(Float)
    std_dev_bt = Column(Float)
    count = Column(Integer)
    geometry = Column(Geometry("POLYGON"))
    storm = relationship("Storm", backref="events")


class HailReport(Base):
    """ 
        Table to store the hail reports from PreVots
    """
    __tablename__ = "hail_reports"
    
    id = Column(Integer, primary_key=True)
    datetime = Column(DateTime)
    location = Column(Geometry("POINT"))


class Intersection(Base):
    """
        Table to store the matches of storm polygons and hail reports
    """
    __tablename__ = "intersections"
    
    id = Column(Integer, primary_key=True)
    storm_event_id = Column(Integer, ForeignKey("storm_events.id"))
    #hail_report_id = Column(Integer, ForeignKey("hail_reports.id"))
    intersection_time = Column(DateTime)
    intersection_geom = Column(Geometry("POLYGON"))
    storm_event = relationship("StormEvent", backref="intersections")
"""

In [None]:
#| hide
"""
# Database setup
#engine = create_engine('postgresql+psycopg2://postgres:Rutherfordio_!2@localhost:5432/mydatabase')
engine = create_engine('postgresql+psycopg2://postgres:Rutherfordio_!2@localhost:5432/postgres')
#Session = sessionmaker(bind=engine)
#session = Session()
"""

In [None]:
#| hide
"""
from sqlalchemy.exc import ProgrammingError
from sqlalchemy.sql import text

def create_database(engine, db_name):
    conn = engine.connect()
    conn.execute(text("COMMIT"))  # Required to execute the CREATE DATABASE command outside of a transaction
    try:
        conn.execute(text(f"CREATE DATABASE {db_name}"))
        print(f"Database '{db_name}' created successfully.")
    except ProgrammingError as pe:
        print(f"Database '{db_name}' already exists.")
    finally:
        conn.close()

# Example usage
create_database(engine, 'mydatabase')
"""

Database 'mydatabase' created successfully.


In [None]:
#| hide
"""
engine = create_engine('postgresql+psycopg2://postgres:Rutherfordio_!2@localhost:5432/mydatabase')

# Enable PostGIS
with engine.connect() as conn:
    conn.execute(text("COMMIT"))  # Make sure no transaction is in progress
    conn.execute(text("CREATE EXTENSION IF NOT EXISTS postgis"))  # Enable PostGIS
    print("PostGIS extension has been enabled")

## teste de conexão com o banco de dados
#connection = engine.connect()
#result = connection.execute(text("SELECT NOW()"))
#for row in result:
#    print(row)
#connection.close()
"""

PostGIS extension has been enabled


In [None]:
#| hide
"""
from geoalchemy2 import Geometry, WKTElement
from shapely.geometry import Polygon, mapping


Session = sessionmaker(bind=engine)
#session = Session()
with Session() as session:
    # Load and insert data
    for index, row in gdf_related.iterrows():
    
        # Check if the unique storm already exists
        storm = session.query(Storm).filter_by(identifier=row['name']).first() # First storm event is always spontaneous
        if not storm:
            storm = session.query(Storm).filter_by(identifier=row['relationships']).first() # In case the origin of the split is already in the db
    
        if not storm:
            storm = Storm(identifier=best_match['name'])
            session.add(storm)
            session.commit()
    
            # Insert storm event
        storm_event = StormEvent(
            storm_id = storm.id,
            datetime=row['date'],
            mean_bt=row['mean'],
            min_bt=row['min'],
            std_dev_bt=row['std'],
            count=row["count"],
            event_type=row['event'],
            geometry=WKTElement(row['geom'].wkt, srid=4326)
            #geometry=from_shape(row['geom'], srid=4326)  # Ensure SRID matches your data
        )
        session.add(storm_event)
    
    
        # Adding Prevots
        #hail_report = HailReport(datetime=relato['date_time'], location=from_shape(Point(relato['lon'], relato['lat']), srid=4326))
        #session.add(hail_report)
        #session.commit()  # Commit to get an ID for the hail report
        
        if row[:6].equals(best_match[:6]): 

            intersec_geom = row["geom"].intersection(relato_teste["buffer"])
    
            intersection = Intersection(
            storm_event_id = storm_event.id,
            #hail_report_id=hail_report.id,
            intersection_time = row['date'],
            intersection_geom = WKTElement(intersec_geom.wkt, srid=4326)
            )
            session.add(intersection)
    
    
    
    # Commit the session to save all changes
    session.commit()
    
"""

In [None]:
#| hide
"""
# Teste juntando todas tempestades relacionadas
for name in gdf_related["relationships"].unique():
    if not name:
        continue
    print(name)
    indirect_related = query2gdf(conn, justname_query, (str(name),), "geom")
    gdf_related = pd.concat([gdf_related, indirect_related])

# Removendo os registros que já estavam presentes
gdf_related = gdf_related.drop_duplicates()

# Ajustando a ordem no banco de acordo com o surgimetno de cada uma
gdf_related = gdf_related.sort_values(by="date", ignore_index=True)
gdf_related
"""

In [None]:
#| hide
"""
# Query to count the number of entries in the intersections table
intersection_count = session.query(Intersection).count()
print(f"Number of entries in the 'intersections' table: {intersection_count}")


# Query to fetch and print some sample entries from the events table
sample_events = session.query(StormEvent).limit(10).all()
for idx, event in enumerate(sample_events, start=1):
    print(f"Event {idx}: ID={event.id}, Storm ID={event.storm_id}, Datetime={event.datetime} \n")#Geometry={to_shape(event.geometry)} \n")

# Query to fetch and print some sample entries from the intersections table
sample_intersections = session.query(Intersection).limit(5).all()
for idx, intersection in enumerate(sample_intersections, start=1):
    print(f"Intersection {idx}: ID={intersection.id}, Storm Event ID={intersection.storm_event_id}, Datetime={intersection.intersection_time} \n")#, Geometry={to_shape(intersection.intersection_geom)} \n")
"""

In [None]:
#| hide
"""
with open('hail_log_database.txt', 'w') as f:
    f.write(f"Labeling hail \n")
"""

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()