# Terremotos en Mexico

Proceso ETL:

+ Extraccion de datos desde el servicio sismologico nacional de la UNAM: http://www.ssn.unam.mx
+ Transformacion necesaria de los datos
+ Carga de datos en base de datos y mapa.

In [1]:
import warnings
warnings.simplefilter('ignore')

import requests as req
import xmltodict
import re

import pandas as pd

**Extraccion**

In [4]:
url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'

data=req.get(url).content

In [14]:
xmltodict.parse(data)['rss']['channel']['item'][0]

OrderedDict([('title', '4.3, 92 km al SUROESTE de  TONALA, CHIS'),
             ('description',
              '<p>Fecha:2020-12-14 11:23:30 (Hora de M&eacute;xico)<br/>Lat/Lon: 15.35/-94.15<br/>Profundidad: 37.0 km </p>'),
             ('link',
              'http://www2.ssn.unam.mx:8080/jsp/localizacion-de-sismo.jsp?latitud=15.35&longitud=-94.15&prf=37.0 km&ma=4.3&fecha=2020-12-14&hora=11:23:30&loc=92 km al SUROESTE de  TONALA, CHIS        &evento=1'),
             ('geo:lat', '15.35'),
             ('geo:long', '-94.15')])

In [17]:
len(xmltodict.parse(data)['rss']['channel']['item'])

15

In [15]:
xmltodict.parse(data)['rss']['channel']['item'][0]['title']

'4.3, 92 km al SUROESTE de  TONALA, CHIS'

In [16]:
def get_data():
    
    url='http://www.ssn.unam.mx/rss/ultimos-sismos.xml'

    data=req.get(url).content
    
    return xmltodict.parse(data)

**Transformacion**

In [55]:
def get_dictio():
    
    d=get_data()
    
    res=[]
    
    lst=d['rss']['channel']['item']
    
    for e in lst:
        
        link=e['link']
        
        fecha=re.findall('fecha=\d+-\d+-\d+', link)[0].split('=')[1]
        hora=re.findall('hora=\d+:\d+:\d+', link)[0].split('=')[1]
        lat=re.findall('latitud=\d+\.\d+', link)[0].split('=')[1]
        lng=re.findall('longitud=-\d+\.\d+', link)[0].split('=')[1]
        prof=re.findall('prf=\d+\.\d+', link)[0].split('=')[1]
        inten=float(re.findall('ma=\d+\.\d+', link)[0].split('=')[1])
        
        data={'Fecha': fecha, 'Hora': hora, 'Latitud':lat,
              'Longitud': lng, 'Profundidad':prof, 'Intensidad':inten}
        
        res.append(data)
        
    return res
        

In [56]:
get_dictio()[0]

{'Fecha': '2020-12-14',
 'Hora': '11:23:30',
 'Latitud': '15.35',
 'Longitud': '-94.15',
 'Profundidad': '37.0',
 'Intensidad': 4.3}

In [57]:
lst=xmltodict.parse(data)['rss']['channel']['item'][0]['link'].split('?')[1].split('&')

for e in lst:
    print(e.split('='))

['latitud', '15.35']
['longitud', '-94.15']
['prf', '37.0 km']
['ma', '4.3']
['fecha', '2020-12-14']
['hora', '11:23:30']
['loc', '92 km al SUROESTE de  TONALA, CHIS        ']
['evento', '1']


In [58]:
from urllib.parse import urlparse
u=urlparse(xmltodict.parse(data)['rss']['channel']['item'][0]['link'])
u.query

'latitud=15.35&longitud=-94.15&prf=37.0 km&ma=4.3&fecha=2020-12-14&hora=11:23:30&loc=92 km al SUROESTE de  TONALA, CHIS        &evento=1'

**Carga base datos de mongo**

In [59]:
import pymongo

In [60]:
cliente=pymongo.MongoClient()
db=cliente.earthquake

In [61]:
def load_mongo():
    
    data=get_dictio()
    
    for e in data:
        db.ssnUnam.update(e, e, upsert=True)   # (elemento original, elemento modificado)
        print(e)

In [64]:
load_mongo()

{'Fecha': '2020-12-14', 'Hora': '11:23:30', 'Latitud': '15.35', 'Longitud': '-94.15', 'Profundidad': '37.0', 'Intensidad': 4.3}
{'Fecha': '2020-12-14', 'Hora': '04:53:40', 'Latitud': '18.34', 'Longitud': '-100.79', 'Profundidad': '57.0', 'Intensidad': 3.8}
{'Fecha': '2020-12-14', 'Hora': '04:45:40', 'Latitud': '16.15', 'Longitud': '-96.36', 'Profundidad': '43.0', 'Intensidad': 3.8}
{'Fecha': '2020-12-14', 'Hora': '04:42:12', 'Latitud': '16.42', 'Longitud': '-95.49', 'Profundidad': '3.0', 'Intensidad': 3.4}
{'Fecha': '2020-12-14', 'Hora': '04:21:52', 'Latitud': '16.05', 'Longitud': '-97.65', 'Profundidad': '30.0', 'Intensidad': 3.3}
{'Fecha': '2020-12-14', 'Hora': '04:20:56', 'Latitud': '16.41', 'Longitud': '-95.06', 'Profundidad': '11.0', 'Intensidad': 3.4}
{'Fecha': '2020-12-14', 'Hora': '04:16:54', 'Latitud': '16.55', 'Longitud': '-98.43', 'Profundidad': '9.0', 'Intensidad': 3.3}
{'Fecha': '2020-12-14', 'Hora': '04:15:16', 'Latitud': '16.57', 'Longitud': '-98.48', 'Profundidad': '6.0

**Carga base datos de postgres**

In [71]:
from sqlalchemy import create_engine, Column, Float, Integer, JSON, DateTime, Text
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import DDL

from sqlalchemy.schema import Sequence

In [72]:
SCHEMA='terremotos'

In [73]:
str_conn=f'postgresql://postgres:password@localhost:5432/{SCHEMA}'

In [74]:
class Conector:
    def __init__(self, str_conn):
        print('Conectando...')
        
        self.motor=create_engine(str_conn)
        self.sesion=sessionmaker(bind=self.motor)()

In [93]:
Base=declarative_base()

class Mexico(Base):
    __tablename__='mexico'
    __table_args__={'schema': SCHEMA}
    
    _id=Column(Integer(), 
                    Sequence('mexico_id_seq', start=1, increment=1),   
                    primary_key=True)
    fecha=Column(Text())
    hora=Column(Text())
    lat=Column(Float())
    lng=Column(Float())
    prof=Column(Float())
    inten=Column(Float())

In [94]:
class Prueba:
    
    def __init__(self, str_conn):
        
        conn=Conector(str_conn)
        self.motor=conn.motor
        self.sesion=conn.sesion
        
        self.motor.execute(DDL(f'create schema if not exists {SCHEMA}'))
        
    
    def crea_tablas(self):
        if not self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Creando tabla...')
            Mexico.__table__.create(self.motor)
        else:
            print('Tabla ya existe.')
        
        
    def rellena_tablas(self, datos):
        
        for e in datos:
            item=Mexico(fecha=e['Fecha'],
                        hora=e['Hora'],
                        lat=e['Latitud'],
                        lng=e['Longitud'],
                        prof=e['Profundidad'],
                        inten=e['Intensidad'])

            self.sesion.add(item)
            
        self.sesion.commit()
        print('Comiteado')
        
        
    def borra_tablas(self):
        if self.motor.dialect.has_table(self.motor, Mexico.__tablename__, schema=SCHEMA):
            print('Borrando tabla...')
            Mexico.__table__.drop(self.motor)
            
            
    def show_df(self):
        data=self.motor.execute(DDL(f'select * from {SCHEMA}.mexico')).fetchall()
        columns=self.motor.execute(DDL(f"select * from {SCHEMA}.information_schema.columns where table_name='mexico'")).fetchall()
        return pd.DataFrame(data, columns=[e[3] for e in columns])

In [95]:
pa_sql=Prueba(str_conn)

Conectando...


In [96]:
pa_sql.borra_tablas()

Borrando tabla...


In [97]:
pa_sql.crea_tablas()

Creando tabla...


In [100]:
pa_sql.rellena_tablas(get_dictio())

Comiteado


In [101]:
pa_sql.show_df()

Unnamed: 0,_id,fecha,hora,lat,lng,prof,inten
0,1,2020-12-14,11:23:30,15.35,-94.15,37.0,4.3
1,2,2020-12-14,04:53:40,18.34,-100.79,57.0,3.8
2,3,2020-12-14,04:45:40,16.15,-96.36,43.0,3.8
3,4,2020-12-14,04:42:12,16.42,-95.49,3.0,3.4
4,5,2020-12-14,04:21:52,16.05,-97.65,30.0,3.3
5,6,2020-12-14,04:20:56,16.41,-95.06,11.0,3.4
6,7,2020-12-14,04:16:54,16.55,-98.43,9.0,3.3
7,8,2020-12-14,04:15:16,16.57,-98.48,6.0,3.7
8,9,2020-12-14,04:13:53,16.42,-95.06,4.0,3.5
9,10,2020-12-14,03:56:46,16.57,-98.48,6.0,3.5


**Mapa (heatmap)**

In [102]:
import folium
from folium import plugins

In [103]:
def load_map(query={'Intensidad':{'$gt':0}}):
    
    lst=list(db.ssnUnam.find(query))
    
    data=pd.DataFrame(lst)
    display(data.head())
    
    value_data=data[['Latitud', 'Longitud']].values
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    mapa.add_child(plugins.HeatMap(value_data, radius=15))
    
    mapa.save('../images/quakes_map_mex.html')
    
    display(mapa)

In [104]:
load_map()

Unnamed: 0,_id,Fecha,Hora,Latitud,Longitud,Profundidad,Intensidad
0,5fad4ed179cda8b3b00b4f22,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,5fad4ed179cda8b3b00b4f24,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7
2,5fad4ed179cda8b3b00b4f26,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1
3,5fad4ed179cda8b3b00b4f28,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8
4,5fad4ed179cda8b3b00b4f2a,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4


In [105]:
load_map({'Intensidad':{'$gt':3}})

Unnamed: 0,_id,Fecha,Hora,Latitud,Longitud,Profundidad,Intensidad
0,5fad4ed179cda8b3b00b4f22,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,5fad4ed179cda8b3b00b4f24,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7
2,5fad4ed179cda8b3b00b4f26,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1
3,5fad4ed179cda8b3b00b4f28,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8
4,5fad4ed179cda8b3b00b4f2a,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4


In [106]:
load_map({'Intensidad':{'$gt':4}})

Unnamed: 0,_id,Fecha,Hora,Latitud,Longitud,Profundidad,Intensidad
0,5fad4ed179cda8b3b00b4f22,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3
1,5fb7a09bc699c142faee8721,2020-11-20,02:43:53,16.18,-94.54,87.0 km,4.9
2,5fb7a09bc699c142faee8723,2020-11-20,00:09:52,15.67,-96.29,19.0 km,4.2
3,5fb7a09bc699c142faee8731,2020-11-19,14:58:25,14.68,-95.25,15.0 km,4.1
4,5fb8ff5da7e595d3ec5f255c,2020-11-20,18:05:49,24.11,-108.96,16.0 km,4.2


**Heatmap animado**

In [107]:
from folium.plugins import HeatMapWithTime as HMWT

In [108]:
def animated_map(query={'Intensidad':{'$gt':0}}):
    
    lst=list(db.ssnUnam.find(query))
    
    data=pd.DataFrame(lst)
    data['count']=1
    data['hour']=data.Hora.apply(lambda x: int(x.split(':')[0]))
    
    value_data=[data.loc[data.hour==h,
                        ['Latitud', 'Longitud', 'count']]\
               .groupby(['Latitud', 'Longitud'])\
               .sum()\
               .reset_index().values.tolist()
               for h in data.hour.sort_values().unique()]
    
    display(data.head())
    
    mapa=folium.Map([19.42, -99.12], zoom_start=4)
    
    HMWT(value_data,
         radius=5,
         gradient={0.2:'blue', 0.4:'lime', 0.6:'orange', 1:'red'},
         min_opacity=0.5,
         max_opacity=0.8,
         use_local_extrema=True).add_to(mapa)
    
    
    mapa.save('../images/animated_quake_map_mex.html')
    
    display(mapa)

In [109]:
animated_map()

Unnamed: 0,_id,Fecha,Hora,Latitud,Longitud,Profundidad,Intensidad,count,hour
0,5fad4ed179cda8b3b00b4f22,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3,1,5
1,5fad4ed179cda8b3b00b4f24,2020-11-12,04:48:05,16.21,-96.46,65.0 km,3.7,1,4
2,5fad4ed179cda8b3b00b4f26,2020-11-12,04:21:59,16.68,-97.47,13.0 km,3.1,1,4
3,5fad4ed179cda8b3b00b4f28,2020-11-12,04:14:19,15.37,-94.77,16.0 km,3.8,1,4
4,5fad4ed179cda8b3b00b4f2a,2020-11-12,04:13:53,17.36,-101.65,5.0 km,3.4,1,4


In [111]:
animated_map({'Intensidad':{'$gt':4}})

Unnamed: 0,_id,Fecha,Hora,Latitud,Longitud,Profundidad,Intensidad,count,hour
0,5fad4ed179cda8b3b00b4f22,2020-11-12,05:58:17,15.82,-95.04,16.0 km,4.3,1,5
1,5fb7a09bc699c142faee8721,2020-11-20,02:43:53,16.18,-94.54,87.0 km,4.9,1,2
2,5fb7a09bc699c142faee8723,2020-11-20,00:09:52,15.67,-96.29,19.0 km,4.2,1,0
3,5fb7a09bc699c142faee8731,2020-11-19,14:58:25,14.68,-95.25,15.0 km,4.1,1,14
4,5fb8ff5da7e595d3ec5f255c,2020-11-20,18:05:49,24.11,-108.96,16.0 km,4.2,1,18
