In [76]:
import pandas as pd
import numpy as np
import psycopg2
import psycopg2.extras as extras

In [None]:
# We load all DataFrames

accidents_df = pd.read_json("shapefiles/accidents_2015-2022_r1.json")
injured_people_df = pd.read_json("shapefiles/injured_people_2015-2022_r1.json")
killed_people_df = pd.read_json("shapefiles/killed_people_2015-2022_r2.json")
vehicles_df = pd.read_json("shapefiles/vehicles_2015-2022_r1.json")
actors_df = pd.read_json("shapefiles/actors_2015-2022_r1.json")
causes_df = pd.read_json("shapefiles/causes_2015-2022_r1.json")

In [63]:
# Make sure to create the postgres user "dev" with password "dev" and the database "accidents_smb" to make this work.
# We create the tables in the database

# https://www.tutorialspoint.com/python_data_access/python_postgresql_database_connection.htm
db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
cursor = db_conn.cursor()
commands = (
    """
    CREATE TABLE siniestros(
        OBJECTID INT,
        FORMULARIO VARCHAR(25) CONSTRAINT SINIESTROS_PK PRIMARY KEY,
        LOCALIDAD VARCHAR(50),
        CIV INT,
        PK_CALZADA INT,
        LONGITUD FLOAT(10),
        LATITUD FLOAT(10),
        CLASE_ACC VARCHAR(25),
        GRAVEDAD VARCHAR(25),
        FECHA_ACC VARCHAR(25),
        ANO_OCURRENCIA_ACC SMALLINT,
        MES_OCURRENCIA_ACC VARCHAR(25),
        MES_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_OCURRENCIA_ACC VARCHAR(25),
        DIA_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_MES_OCURRENCIA_ACC SMALLINT,
        HORA_OCURRENCIA_ACC SMALLINT
    )
    """,
    
    """
    CREATE TABLE conheridos(
        OBJECTID INT,
        FORMULARIO VARCHAR(25),
        LOCALIDAD VARCHAR(50),
        LONGITUD FLOAT(10),
        LATITUD FLOAT(10),
        CLASE_ACC VARCHAR(25),
        CONDICION VARCHAR(25),
        GENERO VARCHAR(25),
        EDAD FLOAT(1),
        FECHA_ACC VARCHAR(25),
        ANO_OCURRENCIA_ACC SMALLINT,
        MES_OCURRENCIA_ACC VARCHAR(25),
        MES_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_OCURRENCIA_ACC VARCHAR(25),
        DIA_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_MES_OCURRENCIA_ACC SMALLINT,
        HORA_OCURRENCIA_ACC SMALLINT,
        CONSTRAINT CONHERIDOS_FK
            FOREIGN KEY(FORMULARIO) 
                REFERENCES siniestros(FORMULARIO)
                ON DELETE CASCADE
    )
    """,
    
    """
    CREATE TABLE confallecidos(
        OBJECTID INT,
        FORMULARIO VARCHAR(25),
        LOCALIDAD VARCHAR(50),
        LONGITUD FLOAT(10),
        LATITUD FLOAT(10),
        CLASE_ACC VARCHAR(25),
        CONDICION VARCHAR(25),
        GENERO VARCHAR(25),
        EDAD FLOAT(1),
        MUERTE_POSTERIOR VARCHAR(5),
        FECHA_POSTERIOR_MUERTE FLOAT(1),
        FECHA_ACC VARCHAR(25),
        ANO_OCURRENCIA_ACC SMALLINT,
        MES_OCURRENCIA_ACC VARCHAR(25),
        MES_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_OCURRENCIA_ACC VARCHAR(25),
        DIA_NRO_OCURRENCIA_ACC SMALLINT,
        DIA_MES_OCURRENCIA_ACC SMALLINT,
        HORA_OCURRENCIA_ACC SMALLINT,
        CONSTRAINT CONFALLECIDOS_FK
            FOREIGN KEY(FORMULARIO) 
                REFERENCES siniestros(FORMULARIO)
                ON DELETE CASCADE
    )
    """,
    
    """
    CREATE TABLE vehiculos(
        OBJECTID INT,
        FORMULARIO VARCHAR(25),
        CODIGO_VEHICULO SMALLINT,
        CLASE VARCHAR(25),
        SERVICIO VARCHAR(25),
        MODALIDAD VARCHAR(50),
        ENFUGA VARCHAR(5),
        CONSTRAINT VEHICULOS_FK
            FOREIGN KEY(FORMULARIO) 
                REFERENCES siniestros(FORMULARIO)
                ON DELETE CASCADE
    )
    """,
    
    """
    CREATE TABLE actores(
        OBJECTID INT,
        FORMULARIO VARCHAR(25),
        CODIGO_VICTIMA SMALLINT,
        CODIGO_VEHICULO	SMALLINT,
        CONDICION VARCHAR(25),
        GENERO VARCHAR(25),
        EDAD FLOAT(1),
        ESTADO VARCHAR(25),
        MUERTE_POSTERIOR VARCHAR(5),
        FECHA_POSTERIOR_MUERTE FLOAT(1),
        ESTADO_FINAL VARCHAR(25),
        CONSTRAINT ACTORES_FK
            FOREIGN KEY(FORMULARIO) 
                REFERENCES siniestros(FORMULARIO)
                ON DELETE CASCADE
    )
    """,
    
    """
    CREATE TABLE causas(
        OBJECTID INT,
        FORMULARIO VARCHAR(25),
        CODIGO_VEHICULO SMALLINT,
        CODIGO_CAUSA VARCHAR(10),
        NOMBRE VARCHAR(75),
        TIPO VARCHAR(25),
        TIPO_CAUSA VARCHAR(25),
        CONSTRAINT CAUSAS_FK
            FOREIGN KEY(FORMULARIO) 
                REFERENCES siniestros(FORMULARIO)
                ON DELETE CASCADE
    )
    """
)
for command in commands:
    cursor.execute(command)
cursor.close()
db_conn.commit()
db_conn.close()

In [99]:
# https://www.geeksforgeeks.org/how-to-insert-a-pandas-dataframe-to-an-existing-postgresql-table/

def execute_values(conn, df, table):
  
    tuples = [tuple(x) for x in df.to_numpy()]
  
    cols = ','.join(list(df.columns))
    # SQL query to execute
    query = "INSERT INTO %s(%s) VALUES %%s" % (table, cols)
    cursor = conn.cursor()
    try:
        extras.execute_values(cursor, query, tuples)
        conn.commit()
    except (Exception, psycopg2.DatabaseError) as error:
        print("Error: %s" % error)
        conn.rollback()
        cursor.close()
        return 1
    print("the dataframe is inserted")
    cursor.close()

In [86]:
# Make some adjustments on accidents_df

accidents_df["FECHA_ACC"] = pd.to_datetime(accidents_df["FECHA_HORA_ACC_r"], unit = "ms").dt.date
accidents_df.drop({"FECHA_HORA_ACC", "FECHA_HORA_ACC_r"}, axis = 1, inplace = True)
accidents_df.rename(columns = {"LONGITUDE": "LONGITUD", "LATITUDE": "LATITUD"}, inplace = True)
accidents_df.head()

Unnamed: 0,OBJECTID,FORMULARIO,LOCALIDAD,CIV,PK_CALZADA,CLASE_ACC,GRAVEDAD,LONGITUD,LATITUD,ANO_OCURRENCIA_ACC,MES_OCURRENCIA_ACC,MES_NRO_OCURRENCIA_ACC,DIA_OCURRENCIA_ACC,DIA_NRO_OCURRENCIA_ACC,DIA_MES_OCURRENCIA_ACC,HORA_OCURRENCIA_ACC,FECHA_ACC
0,398271,A10979,SUBA,11009611,202458,CHOQUE,SOLO DANOS,-74.0526,4.7173,2015,JULIO,7,LUNES,1,27,14,2015-07-27
1,397828,A000040348,KENNEDY,8006024,245295,ATROPELLO,CON HERIDOS,-74.1712,4.6241,2015,ENERO,1,DOMINGO,7,4,16,2015-01-04
2,397852,A2454,KENNEDY,8005274,197189,CHOQUE,CON HERIDOS,-74.1422,4.6272,2015,FEBRERO,2,JUEVES,4,26,18,2015-02-26
3,397889,A000277508,KENNEDY,50008335,271869,CHOQUE,CON HERIDOS,-74.1399,4.6184,2015,NOVIEMBRE,11,LUNES,1,23,17,2015-11-23
4,397891,A000238124,KENNEDY,8004223,197043,CHOQUE,SOLO DANOS,-74.1352,4.6322,2015,SEPTIEMBRE,9,VIERNES,5,25,14,2015-09-25


In [91]:
# Insert the accidents DataFrame into the siniestros table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, accidents_df, "siniestros")

the dataframe is inserted


In [101]:
# Make some adjustments on injured_people_df

injured_people_df["FECHA_ACC"] = pd.to_datetime(injured_people_df["FECHA_HORA_ACC_r"], unit = "ms").dt.date
injured_people_df.drop({"FECHA_HORA_ACC", "FECHA_HORA_ACC_r"}, axis = 1, inplace = True)
injured_people_df.rename(columns = {"LONGITUDE": "LONGITUD", "LATITUDE": "LATITUD"}, inplace = True)
injured_people_df.head()

Unnamed: 0,OBJECTID,FORMULARIO,LOCALIDAD,CLASE_ACC,CONDICION,GENERO,EDAD,LONGITUD,LATITUD,ANO_OCURRENCIA_ACC,MES_OCURRENCIA_ACC,MES_NRO_OCURRENCIA_ACC,DIA_OCURRENCIA_ACC,DIA_NRO_OCURRENCIA_ACC,DIA_MES_OCURRENCIA_ACC,HORA_OCURRENCIA_ACC,FECHA_ACC
0,750107,A284367,TEUSAQUILLO,CHOQUE,CICLISTA,MASCULINO,44.0,-74.091857,4.62946,2015,OCTUBRE,10,JUEVES,4,15,6,2015-10-15
1,731755,A5846,SUBA,CHOQUE,PASAJERO,FEMENINO,23.0,-74.074398,4.73915,2015,MAYO,5,MARTES,2,26,11,2015-05-26
2,731754,A5846,SUBA,CHOQUE,MOTOCICLISTA,MASCULINO,26.0,-74.074398,4.73915,2015,MAYO,5,MARTES,2,26,11,2015-05-26
3,760009,A000239038,KENNEDY,ATROPELLO,PEATON,FEMENINO,79.0,-74.169008,4.618647,2015,SEPTIEMBRE,9,MIÉRCOLES,3,16,11,2015-09-16
4,749002,A9675,FONTIBON,ATROPELLO,PEATON,MASCULINO,21.0,-74.138451,4.678535,2015,JULIO,7,JUEVES,4,16,22,2015-07-16


In [104]:
# Insert the injured people DataFrame into the conheridos table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, injured_people_df, "conheridos")

the dataframe is inserted


In [107]:
# Make some adjustments on killed_people_df

killed_people_df["FECHA_ACC"] = pd.to_datetime(killed_people_df["FECHA_HORA_ACC_r"], unit = "ms").dt.date
killed_people_df.drop({"FECHA_HORA_ACC", "FECHA_HORA_ACC_r"}, axis = 1, inplace = True)
killed_people_df.rename(columns = {"LONGITUDE": "LONGITUD", "LATITUDE": "LATITUD"}, inplace = True)
killed_people_df.head()

Unnamed: 0,OBJECTID,FORMULARIO,LOCALIDAD,CLASE_ACC,CONDICION,GENERO,EDAD,MUERTE_POSTERIOR,FECHA_POSTERIOR_MUERTE,LONGITUD,LATITUD,ANO_OCURRENCIA_ACC,MES_OCURRENCIA_ACC,MES_NRO_OCURRENCIA_ACC,DIA_OCURRENCIA_ACC,DIA_NRO_OCURRENCIA_ACC,DIA_MES_OCURRENCIA_ACC,HORA_OCURRENCIA_ACC,FECHA_ACC
0,1580567,A001342911,KENNEDY,CHOQUE,MOTOCICLISTA,FEMENINO,42.0,N,,-74.139547,4.655748,2021,SEPTIEMBRE,9,MARTES,2,28,5,2021-09-28
1,1572714,A001341550,KENNEDY,CHOQUE,CICLISTA,MASCULINO,38.0,N,,-74.125406,4.62958,2021,SEPTIEMBRE,9,JUEVES,4,2,20,2021-09-02
2,1560220,A001390796,KENNEDY,ATROPELLO,PEATON,MASCULINO,22.0,S,1644970000000.0,-74.139016,4.595142,2022,FEBRERO,2,SÁBADO,6,5,4,2022-02-05
3,1305496,A001390796,KENNEDY,ATROPELLO,MOTOCICLISTA,MASCULINO,35.0,N,,-74.139016,4.595142,2022,FEBRERO,2,SÁBADO,6,5,4,2022-02-05
4,1306185,A001447711,FONTIBON,CHOQUE,MOTOCICLISTA,MASCULINO,28.0,N,,-74.139,4.697,2022,ABRIL,4,LUNES,1,4,18,2022-04-04


In [115]:
# Insert the killed people DataFrame into the confallecidos table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, killed_people_df, "confallecidos")

the dataframe is inserted


In [119]:
# Insert the vehicles DataFrame into the vehiculos table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, vehicles_df, "vehiculos")

the dataframe is inserted


In [123]:
# Insert the actors DataFrame into the actores table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, actors_df, "actores")

the dataframe is inserted


In [127]:
# Insert the causes DataFrame into the causas table

db_conn = psycopg2.connect(
   database = "accidents_smb", user = "dev", password = "dev", host = "127.0.0.1", port = "5432"
)
execute_values(db_conn, causes_df, "causas")

the dataframe is inserted
