##  Earthquake detection using social media

### <a name="service">Service Definition

In [None]:
service = dict([('title', 'ETHZ-03-03-01 - Earthquake detection using social media'),
                ('abstract', 'This application takes Twitter to generate a heatmap for earthquake detection'),
                ('id', 'ewf-ethz-03-03-01')])

In [None]:
toDate = dict([('id', 'toDate'),
               ('value', '2020-05-27T00:00:00Z'),
               ('title', 'Final Date'),
               ('abstract', 'Final date of the temporal interval of interest with the format [YYYY-MM-DDThh:mm:ssZ]')])

In [None]:
dDays = dict([('id', 'dDays'),
               ('value', '1'),
               ('title', 'Delta days'),
               ('abstract', 'Number of days of interest - start of temporal interval of interest = toDate - dDays')])

In [None]:
ltaDays = dict([('id', 'ltaDays'),
               ('value', '15'),
               ('title', 'LTA days'),
               ('abstract', 'Number of days before final date used for LTA computation.')])

### <a name="runtime">Runtime parameter definition

**Input reference**

In [None]:
input_reference = 'dummy'

### <a name="workflow">Workflow

#### Import the packages required for processing the data

In [None]:
import os
import sys
import shutil

sys.path.append(os.getcwd())
sys.path.append('/application/notebook/libexec/')

import json
import time
import datetime

import requests

import pandas as pd
import numpy as np
import re
from collections import Counter

import osgeo.ogr as ogr
import osgeo.osr as osr

from zipfile import ZipFile

### Set constants

In [None]:
CERTH_API = 'http://160.40.49.181:4000/tweet_provider_api?productType=japanEarthquakeTweets'

BIN_INTERVAL = 5 #seconds
BIN_UNIT = 60 #tweets per minute
STA_INTERVAL = 60 #seconds
LTA_INTERVAL = 60 * 60 #seconds
m = 4
b = 10

AREA_WKT = "POLYGON((141.8583984375 45.81673343092707,145.83544921875 43.88133803549814,143.99926757812497 39.93772807580545,140.73010253906247 33.79767248255881,129.111328125 30.038050457654403,129.60717773437497 37.279120989381234,141.8583984375 45.81673343092707))"

etc_path = "/application/notebook/etc"

In [None]:
# set dates

ltadays = datetime.timedelta(days=int(ltaDays['value']))
ddays = datetime.timedelta(days=int(dDays['value']))

tf = datetime.datetime.strptime(toDate['value'], '%Y-%m-%dT%H:%M:%SZ')
ti = tf - ltadays 

tid = tf - ddays

fromDate = {'value': ti.strftime("%Y-%m-%dT%H:%M:%SZ")}

startDOI = {'value': tid.strftime("%Y-%m-%dT%H:%M:%SZ")}

print(fromDate['value'], toDate['value'], startDOI['value'])

In [None]:
query = "{}&fromDate={}&toDate={}".format(CERTH_API, fromDate['value'], toDate['value'])
r = requests.get(query)
response = r.json()
if response['total_results'] == len(response['results']):
    results = response['results']

FROM_DATE = pd.to_datetime(fromDate['value'])
TO_DATE = pd.to_datetime(toDate['value'])

START_DOI = pd.to_datetime(startDOI['value'])

## Compute earthquake detection index
#### Function definition

In [None]:
def create_bins(lower_bound, upper_bound, width):
    """ create_bins returns an equal-width (distance) partitioning. 
        It returns an ascending list of tuples, representing the intervals.
        A tuple bins[i], i.e. (bins[i][0], bins[i][1])  with i > 0 
        and i < quantity, satisfies the following conditions:
            (1) bins[i][0] + width == bins[i][1]
            (2) bins[i-1][0] + width == bins[i][0] and
                bins[i-1][1] + width == bins[i][1]
    """
    
    freq = "{}s".format(width)
    bins = []
    times = pd.date_range(start=lower_bound, end=upper_bound, freq=freq )
    for i, t in enumerate(times):
        if i + 1 != len(times):
            bins.append((t, times[i+1]))
    return bins

def find_bin(value, bins):
    """ bins is a list of tuples, like [(0,20), (20, 40), (40, 60)],
        binning returns the smallest index i of bins so that
        bin[i][0] <= value < bin[i][1]
    """
    
    for i in range(0, len(bins)):
        if bins[i][0] <= value < bins[i][1]:
            return i
    return -1

def get_coeficient_array(tweet_df, bins):
    tweet_per_minute = [0] * len(bins)
    counter = Counter(tweet_df['bin_index'])
    for x, y in counter.items():
        tweet_per_minute[x] = y * int(BIN_UNIT / BIN_INTERVAL)

    product = {'time_window' : bins, 'tweets_per_minute':tweet_per_minute}
    df = pd.DataFrame(product)

    sta_window = int(STA_INTERVAL/BIN_INTERVAL) +1
    lta_window = int(LTA_INTERVAL/BIN_INTERVAL) +1
    df['STA'] = df.iloc[:,1].rolling(window=sta_window).mean()
    df['LTA'] = df.iloc[:,1].rolling(window=lta_window).mean()

    df['C'] = df['STA'] / (m * df['LTA'] + b)
    
    return df['C']

def get_wkt(coordinates):
    lat, long = coordinates.split(" ")
    wkt = "POINT ({} {})".format(long, lat)
    return wkt 

def date_to_str_iso(date):
    return date.strftime('%Y-%m-%dT%H:%M:%SZ')

def date_to_str(date):
    return date.strftime('%Y%m%d%H%M')

def create_zip_file(files, output_path):
    with ZipFile(output_path, 'w') as myzip:
        for file in files:
            myzip.write(file, arcname=os.path.basename(file))
    return output_path

def write_properties_file(output_name, first_date, last_date, region_of_interest):
    
    title = 'Output %s' % output_name
    
    with open(output_name + '.properties', 'wb') as file:
        file.write('title=%s\n' % title)
        file.write('date=%s/%s\n' % (first_date, last_date))
        file.write('geometry=%s' % (region_of_interest))

#### Tweet dataframe setup

In [None]:
bins = create_bins(lower_bound=FROM_DATE,
                   upper_bound=TO_DATE,
                   width=BIN_INTERVAL)


binned_weights = []
wkt = []
tweet_time = []

for result in results:
    pd_datetime = pd.to_datetime(result['timestamp'])
    bin_index = find_bin(pd_datetime, bins)
    wkt.append(get_wkt(result['coordinates']))
    tweet_time.append(pd_datetime)
    binned_weights.append(bin_index)
    
event_number = [0] * len(binned_weights)
tweet_product = {'bin_index': binned_weights, 'wkt': wkt, 'tweet_time':tweet_time, 'event_number': event_number}
tweet_df = pd.DataFrame(tweet_product)

#### Metric computation

In [None]:
C = get_coeficient_array(tweet_df, bins)

start_time = []
stop_time = []

event_number = 0
event = False
for idx, value in enumerate(C):

    if (value >= 1 and not event):
        event = True
        event_number = event_number + 1

        condition = tweet_df['bin_index'] == idx
        condition_df = tweet_df[condition]
        start_time.append(date_to_str_iso(min(condition_df['tweet_time'])))

    if event:
        condition = tweet_df['bin_index'] == idx
        condition_df = tweet_df[condition]
        tweet_df.loc[condition, 'event_number'] = [event_number] * len(condition_df)

    if value <= 0.25 and event:
        event = False

    if (len(start_time) > len(stop_time)) and not event:
        condition = tweet_df['bin_index'] == idx
        condition_df = tweet_df[condition]
        if len(condition_df['tweet_time']) > 0:
            stop_time.append(date_to_str_iso(max(condition_df['tweet_time'])))

if len(start_time) == len(stop_time) + 1:
    stop_time.append(date_to_str_iso(TO_DATE))

#### Earthquake events dataframe setup

In [None]:
wkt_csv = []
start_csv = []
stop_csv = []

wkt_shp = []

poly_area = ogr.CreateGeometryFromWkt(AREA_WKT)

for n_event in range(len(start_time)):
    
    if(datetime.datetime.strptime(start_time[n_event], '%Y-%m-%dT%H:%M:%SZ') > tid):
    
        event_condition = tweet_df['event_number'] == n_event + 1
        event_df = tweet_df[event_condition]

        wkt_list = []
        for wkt in event_df['wkt']:
            point_geom = ogr.CreateGeometryFromWkt(wkt)
            intersection = poly_area.Intersects(point_geom)

            if intersection:
                if wkt not in wkt_list:
                    wkt_list.append(wkt)
                wkt_shp.append(wkt)

        wkt_csv.extend(wkt_list)
        start_csv.extend([start_time[n_event]] * len(wkt_list))
        stop_csv.extend([stop_time[n_event]] * len(wkt_list))

#### Results creation and publish

In [None]:
if wkt_csv:
    
    from_date = date_to_str(START_DOI)
    to_date = date_to_str(TO_DATE)
    
    from_date_iso = date_to_str_iso(START_DOI)
    to_date_iso = date_to_str_iso(TO_DATE)
    
    
    ## csv file ##
    
    csv_product = {'start_time': start_csv, 'stop_time': stop_csv, 'wkt':wkt_csv}
    csv_df = pd.DataFrame(csv_product)

    
    filename_template = '{}_{}_earthquake_{}'
    filename = filename_template.format(from_date, to_date, 'events')
    csv_df.to_csv(filename + '.csv', index=False)
    
    write_properties_file(filename + '.csv', from_date_iso, to_date_iso, AREA_WKT)
    
    ## shapefile ##
    
    driver = ogr.GetDriverByName("ESRI Shapefile")

    filename = filename_template.format(from_date, to_date, 'heatmap')
    # create the data source
    data_source = driver.CreateDataSource(filename + ".shp")

    # create the spatial reference, WGS84
    srs = osr.SpatialReference()
    srs.ImportFromEPSG(4326)

    # create the layer
    layer = data_source.CreateLayer("earthquakes", srs, ogr.wkbPoint)
    layer.CreateField(ogr.FieldDefn('weight', ogr.OFTInteger))

    for wkt in wkt_shp:
        feature = ogr.Feature(layer.GetLayerDefn())
        feature.SetField("weight", 1)
        point = ogr.CreateGeometryFromWkt(wkt)
        feature.SetGeometry(point)
        layer.CreateFeature(feature)
        feature = None
    # Save and close the data source
    data_source = None
    
    
    tmpqml_rel_path = 'tmp.qml'
    
    tmpqml_path = os.path.join(etc_path, tmpqml_rel_path)
    
    if not(os.path.isfile(tmpqml_path)): # when running locally
        tmpqml_path = os.path.join(os.path.dirname(os.getcwd()), 'etc', 'tmp.qml')
        
    #print(os.path.isfile(tmpqml_path), tmpqml_path)
    
    
    shutil.copy(tmpqml_path, filename + '.qml')
    
    for (dirpath, dirnames, filenames) in os.walk('.'):
        break

    zip_list = []
    for file in filenames:
        if filename in file and '.csv' not in file:
            zip_list.append(os.path.join(dirpath, file))
    create_zip_file(zip_list, filename + '.zip') 

    for file in zip_list:
        os.remove(file)
    
    write_properties_file(filename + '.zip', from_date_iso, to_date_iso, AREA_WKT)
    