# Terremotos en Mexico

Proceso ETL:

+ Extraccion de datos desde el servicio sismologico nacional de la UNAM: http://www.ssn.unam.mx
+ Transformacion necesaria de los datos
+ Carga de datos en base de datos y mapa.

In [1]:
import warnings
warnings.simplefilter('ignore')

import requests as req
import xmltodict
import re

import pandas as pd

**Extraccion**

In [2]:
url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'

data=req.get(url).content

In [3]:
xmltodict.parse(data)['rss']['channel']['item'][0]

OrderedDict([('title', '4.0, 132 km al NORESTE de  GUERRERO NEGRO, BCS'),
             ('description',
              '<p>Fecha:2021-03-15 00:34:31 (Hora de M&eacute;xico)<br/>Lat/Lon: 28.97/-113.35<br/>Profundidad: 10.0 km </p>'),
             ('link',
              'http://www2.ssn.unam.mx:8080/jsp/localizacion-de-sismo.jsp?latitud=28.97&longitud=-113.35&prf=10.0 km&ma=4.0&fecha=2021-03-15&hora=00:34:31&loc=132 km al NORESTE de  GUERRERO NEGRO, BCS        &evento=1'),
             ('geo:lat', '28.97'),
             ('geo:long', '-113.35')])

In [4]:
len(xmltodict.parse(data)['rss']['channel']['item'])

15

In [5]:
xmltodict.parse(data)['rss']['channel']['item'][0]['title']

'4.0, 132 km al NORESTE de  GUERRERO NEGRO, BCS'

In [6]:
def get_data():
    
    url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'

    data=req.get(url).content
    
    return xmltodict.parse(data)

**Transformacion**

In [7]:
def get_dictio():
    
    d=get_data()
    
    res=[]
    
    lst=d['rss']['channel']['item']
    
    for e in lst:
        
        link=e['link']
        
        fecha=re.findall('fecha=\d+-\d+-\d+', link)[0].split('=')[1]
        hora=re.findall('hora=\d+:\d+:\d+', link)[0].split('=')[1]
        lat=re.findall('latitud=\d+\.\d+', link)[0].split('=')[1]
        lng=re.findall('longitud=-\d+\.\d+', link)[0].split('=')[1]
        prof=re.findall('prf=\d+\.\d+', link)[0].split('=')[1]
        inten=float(re.findall('ma=\d+\.\d+', link)[0].split('=')[1])
        
        data={'Fecha': fecha, 'Hora': hora, 'Latitud':lat,
              'Longitud': lng, 'Profundidad':prof, 'Intensidad':inten}
        
        res.append(data)
        
    return res
        

In [8]:
get_dictio()[0]

{'Fecha': '2021-03-15',
 'Hora': '00:34:31',
 'Latitud': '28.97',
 'Longitud': '-113.35',
 'Profundidad': '10.0',
 'Intensidad': 4.0}

In [9]:
lst=xmltodict.parse(data)['rss']['channel']['item'][0]['link'].split('?')[1].split('&')

for e in lst:
    print(e.split('='))

['latitud', '28.97']
['longitud', '-113.35']
['prf', '10.0 km']
['ma', '4.0']
['fecha', '2021-03-15']
['hora', '00:34:31']
['loc', '132 km al NORESTE de  GUERRERO NEGRO, BCS        ']
['evento', '1']


In [10]:
from urllib.parse import urlparse
u=urlparse(xmltodict.parse(data)['rss']['channel']['item'][0]['link'])
u.query

'latitud=28.97&longitud=-113.35&prf=10.0 km&ma=4.0&fecha=2021-03-15&hora=00:34:31&loc=132 km al NORESTE de  GUERRERO NEGRO, BCS        &evento=1'

**Carga base datos de postgres**

In [11]:
from sqlalchemy import create_engine, Column, Float, Integer, JSON, DateTime, Text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import DDL

from sqlalchemy.schema import Sequence

In [12]:
SCHEMA='terremotos'

In [13]:
str_conn=f'postgresql://iudh:password@localhost:5432/{SCHEMA}'

In [14]:
class Conector:
    def __init__(self, str_conn):
        print('Conectando...')
        
        self.motor=create_engine(str_conn)
        self.sesion=sessionmaker(bind=self.motor)()

In [15]:
Base=declarative_base()

class Mexico(Base):
    __tablename__='mexico'
    __table_args__={'schema': SCHEMA}
    
    _id=Column(Integer(), 
                    Sequence('mexico_id_seq', start=1, increment=1),   
                    primary_key=True)
    fecha=Column(Text())
    hora=Column(Text())
    lat=Column(Float())
    lng=Column(Float())
    prof=Column(Text())
    inten=Column(Float())

In [16]:
class Prueba:
    
    def __init__(self, str_conn):
        
        conn=Conector(str_conn)
        self.motor=conn.motor
        self.sesion=conn.sesion
        
        self.motor.execute(DDL(f'create schema if not exists {SCHEMA}'))
        
    
    def crea_tablas(self):
        if not self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Creando tabla...')
            Mexico.__table__.create(self.motor)
        else:
            print('Tabla ya existe.')
        
        
    def rellena_tablas(self, datos):
        
        for e in datos:
            item=Mexico(fecha=e['Fecha'],
                        hora=e['Hora'],
                        lat=e['Latitud'],
                        lng=e['Longitud'],
                        prof=e['Profundidad'],
                        inten=e['Intensidad'])

            self.sesion.add(item)
            
        self.sesion.commit()
        print('Comiteado')
        
        
    def borra_tablas(self):
        if self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Borrando tabla...')
            Mexico.__table__.drop(self.motor)
            
            
    def show_df(self):
        data=self.motor.execute(DDL(f'select * from {SCHEMA}.mexico')).fetchall()
        columns=self.motor.execute(DDL(f"select * from {SCHEMA}.information_schema.columns where table_name='mexico'")).fetchall()
        return pd.DataFrame(data, columns=[e[3] for e in columns])

In [17]:
pa_sql=Prueba(str_conn)

Conectando...


In [18]:
pa_sql.borra_tablas()

Borrando tabla...


In [19]:
pa_sql.crea_tablas()

Creando tabla...


In [21]:
pa_sql.rellena_tablas(pd.read_csv('../datasets/ssnUnam.csv').to_dict('records'))

Comiteado


In [22]:
pa_sql.rellena_tablas(get_dictio())

Comiteado


In [23]:
pa_sql.show_df()

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,2,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7
2,3,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1
3,4,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8
4,5,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4
...,...,...,...,...,...,...,...
70,71,2021-03-14,16:02:25,14.16,-93.34,16.0,3.9
71,72,2021-03-14,15:48:59,18.11,-104.14,16.0,3.6
72,73,2021-03-14,15:29:18,17.02,-96.34,72.0,3.5
73,74,2021-03-14,15:18:24,15.93,-98.93,17.0,3.6


**Mapa (heatmap)**

In [24]:
import folium
from folium import plugins

In [25]:
def load_map(intensidad=0):
    
    data=pa_sql.show_df()[pa_sql.show_df().inten>intensidad]
    display(data.head())
    
    value_data=data[['lat', 'lng']].values
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    mapa.add_child(plugins.HeatMap(value_data, radius=15))
    
    mapa.save('../images/quakes_map_mex.html')
    
    display(mapa)

In [26]:
load_map()

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,2,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7
2,3,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1
3,4,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8
4,5,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4


In [27]:
load_map(3)

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,2,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7
2,3,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1
3,4,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8
4,5,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4


In [28]:
load_map(4)

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
15,16,2020-11-20,02:43:53,16.18,-94.54,87.0 km,4.9
16,17,2020-11-20,00:09:52,15.67,-96.29,19.0 km,4.2
23,24,2020-11-19,14:58:25,14.68,-95.25,15.0 km,4.1
30,31,2020-11-20,18:05:49,24.11,-108.96,16.0 km,4.2


**Heatmap animado**

In [29]:
from folium.plugins import HeatMapWithTime as HMWT

In [30]:
def animated_map(intensidad=0):

    data=pa_sql.show_df()[pa_sql.show_df().inten>intensidad]
    data['count']=1
    data['hour']=data.hora.apply(lambda x: int(x.split(':')[0]))
    
    value_data=[data.loc[data.hour==h,
                        ['lat', 'lng', 'count']]\
               .groupby(['lat', 'lng'])\
               .sum()\
               .reset_index().values.tolist()
               for h in data.hour.sort_values().unique()]
    
    display(data.head())
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    HMWT(value_data,
         radius=5,
         gradient={0.2:'blue', 0.4:'lime', 0.6:'orange', 1:'red'},
         min_opacity=0.5,
         max_opacity=0.8,
         use_local_extrema=True).add_to(mapa)
    
    
    mapa.save('../images/animated_quake_map_mex.html')
    
    display(mapa)

In [31]:
animated_map()

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten,count,hour
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3,1,5
1,2,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7,1,4
2,3,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1,1,4
3,4,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8,1,4
4,5,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4,1,4


In [32]:
animated_map(4)

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten,count,hour
0,1,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3,1,5
15,16,2020-11-20,02:43:53,16.18,-94.54,87.0 km,4.9,1,2
16,17,2020-11-20,00:09:52,15.67,-96.29,19.0 km,4.2,1,0
23,24,2020-11-19,14:58:25,14.68,-95.25,15.0 km,4.1,1,14
30,31,2020-11-20,18:05:49,24.11,-108.96,16.0 km,4.2,1,18
