# Geospatio-temporal analysis 

In [117]:
import requests
import numpy as np
import pandas as pd
import boto3
import os
from boto3.dynamodb.conditions import Attr
from datetime import datetime, timedelta
from haversine import haversine
import folium
import warnings
warnings.filterwarnings('ignore')

## Export env variables

In [2]:
with open("./app.env") as env_file:
    for env in env_file.read().splitlines():
        key, value = env.split('=')
        os.environ[key] = value

## Parameters

In [158]:
START_TIME = '2019-04-22 19:28:01'
BOUNDING_BOX = [(48.97, 8.5), (48.6, 9.4)] # NW, SE

## Query data (Raw data source 1)

In [51]:
def add_10min(timestamp):
    """
    Adds 10 min to a timestamp string.
    
    Args:
        timestamp (str): Input timestamp time is added to
    Returns:
        str: Later timestamp
    """
    return (datetime.strptime(START_TIME, "%Y-%m-%d %H:%M:%S") + timedelta(minutes=10)).strftime("%Y-%m-%d %H:%M:%S")

INT_TS1 = add_10min(START_TIME)
INT_TS2 = add_10min(INT_TS1)

### DynamoDB query

In [141]:
dynamodb = boto3.resource('dynamodb', region_name='us-west-1', aws_access_key_id=os.environ['ACCESS_KEY'], aws_secret_access_key=os.environ['SECRET'])
table = dynamodb.Table('luftdaten')
response = table.scan(FilterExpression=Attr('timestamp').contains(START_TIME[:15]) | 
                      Attr('timestamp').contains(INT_TS1[:15]) | 
                      Attr('timestamp').contains(INT_TS2[:15]),
                      ProjectionExpression='sensordatavalues, #t, sensor.id, #l.longitude, #l.latitude, #l.altitude',
                      ExpressionAttributeNames = {'#t': 'timestamp', '#l': 'location'})
response_items = response['Items']

i = 0
while 'LastEvaluatedKey' in response:
    response = table.scan(ExclusiveStartKey=response['LastEvaluatedKey'],
                          FilterExpression=Attr('timestamp').contains(START_TIME[:15]) | 
                          Attr('timestamp').contains(INT_TS1[:15]) | 
                          Attr('timestamp').contains(INT_TS2[:15]),
                          ProjectionExpression='sensordatavalues, #t, sensor.id, #l.longitude, #l.latitude, #l.altitude',
                          ExpressionAttributeNames = {'#t': 'timestamp', '#l': 'location'})
    response_items.extend(response['Items'])
    i += 1
    if i == 10:
        break

## Download live data (Raw data source 2)

In [151]:
API = 'http://api.luftdaten.info/static/v1/data.json'


def filter_pm_data(json):
    """
    Filters only sensors with PM data.
    Args:
        json (list): List of sensor data jsons.
    Returns (list): List of filtered sensor data jsons.
    """
    return [elem for elem in json if any([True if 'P1' in data.values() or 'P2' in data.values() else False for data in elem['sensordatavalues']]) and elem['location']['altitude']]

res = requests.get(API).json()
response_items = filter_pm_data(res)

## Data preprocessing

In [157]:
response_items[0]

{'id': 3659354165,
 'sampling_rate': None,
 'timestamp': '2019-05-17 20:27:52',
 'location': {'id': 523,
  'latitude': '51.512',
  'longitude': '7.478',
  'altitude': '98.4',
  'country': 'DE',
  'exact_location': 0,
  'indoor': 0},
 'sensor': {'id': 1068,
  'pin': '1',
  'sensor_type': {'id': 14, 'name': 'SDS011', 'manufacturer': 'Nova Fitness'}},
 'sensordatavalues': [{'id': 7759582299, 'value': '20.60', 'value_type': 'P1'},
  {'id': 7759582300, 'value': '19.30', 'value_type': 'P2'}]}

In [152]:
len(response_items)

17717

In [161]:
def transform_pm_data(response_items):
    """
    Transforms pm data to the target format with time, location and pm values.
    
    Args:
        response_items (list): List of pm data from luftdaten.info.
        
    Returns (list): Transformed data.
    """
    pd_items = []
    for item in response_items:
        if (BOUNDING_BOX[0][1] < float(item['location']['longitude']) < BOUNDING_BOX[1][1]) and (BOUNDING_BOX[1][0] < float(item['location']['latitude']) < BOUNDING_BOX[0][0]):
            base_dict = {'latitude': float(item['location']['latitude']),
                         'longitude': float(item['location']['longitude']),
                         'altitude': float(item['location']['altitude']), 
                         'sensor_id': item['sensor']['id'], 
                         'timestamp': datetime.strptime(item['timestamp'], "%Y-%m-%d %H:%M:%S")}
            for sensorvalue in item['sensordatavalues']:
                if sensorvalue['value_type'] == 'P1' or sensorvalue['value_type'] == 'P2':
                    base_dict[sensorvalue['value_type']] = float(sensorvalue['value'])
            pd_items.append(base_dict)
    return pd_items

In [162]:
base_df = pd.DataFrame(transform_pm_data(response_items))
base_df['id'] = base_df.index

In [163]:
base_df

Unnamed: 0,P1,P2,altitude,latitude,longitude,sensor_id,timestamp,id
0,8.90,4.35,240.9,48.738,9.320,21564,2019-05-17 20:27:52,0
1,4.30,4.00,414.2,48.758,8.888,7707,2019-05-17 20:27:52,1
2,3.23,2.93,272.6,48.780,9.212,10477,2019-05-17 20:27:52,2
3,3.50,3.20,345.7,48.680,9.244,11741,2019-05-17 20:27:53,3
4,6.13,5.53,471.5,48.684,9.028,8413,2019-05-17 20:27:53,4
5,6.90,6.40,265.7,48.916,9.124,775,2019-05-17 20:27:53,5
6,17.07,13.70,283.9,48.876,9.086,13837,2019-05-17 20:27:54,6
7,5.07,2.80,480.7,48.718,9.100,8175,2019-05-17 20:27:54,7
8,5.73,4.40,372.7,48.784,9.272,8149,2019-05-17 20:27:54,8
9,5.08,4.30,271.2,48.840,9.188,11598,2019-05-17 20:27:54,9


## Measurements map

In [167]:
data_map = folium.Map(prefer_canvas=True)

def color_producer(pm_value):
    """
    Returns color to pm sensor values.
    
    Args:
        pm_value (float): PM sensor value.
        
    Returns (str): String with collor to fill in points.
    """
    if pm_value < 10:
        return 'green'
    elif 10 <= pm_value < 40:
        return 'orange'
    else:
        return 'red'

def plotDot(point):
    """
    Plots geospatial data containing lon and lats on a map.
    Args:
        point (pandas.DataFrame.Row): Data Frame row containing longitude and latitude of a point.
    """
    folium.CircleMarker(location=[point['latitude'], point['longitude']],
                        radius=2, color=color_producer(point['P1'])).add_to(data_map)

base_df.apply(plotDot, axis = 1)


data_map.fit_bounds(data_map.get_bounds())

data_map

## Create distance matrix

In [96]:
base_df['key'] = 1

In [83]:
merged_df = pd.merge(base_df, base_df, on='key').query('id_x < id_y')

In [113]:
dist_df = merged_df[['id_x', 'id_y']]
dist_df['dist'] = merged_df.apply(lambda row: haversine((row['latitude_x'], row['longitude_x']), (row['latitude_y'], row['longitude_y'])), axis = 1)
dist_df['time_diff'] = merged_df.apply(lambda row: (row['timestamp_x'] - row['timestamp_y']).total_seconds(), axis=1)
dist_df['P1_diff'] = merged_df['P1_x'] - merged_df['P1_y']
dist_df['P2_diff'] = merged_df['P2_x'] - merged_df['P2_y']

In [114]:
dist_df

Unnamed: 0,id_x,id_y,dist,time_diff,P1_diff,P2_diff
1,0,1,595.827909,27.0,-36.08,-22.80
2,0,2,1117.063201,167.0,-3.65,-2.20
3,0,3,471.343173,9.0,-1.87,-1.03
4,0,4,460.733988,155.0,-4.50,-3.82
5,0,5,217.805403,98.0,-1.63,-0.40
6,0,6,1273.998530,189.0,-57.20,-29.52
7,0,7,322.827339,88.0,-11.87,-5.30
8,0,8,306.973214,137.0,-2.45,0.22
9,0,9,98.026644,214.0,-2.02,-0.32
10,0,10,192.717115,172.0,-14.03,-1.60


## Semivariance matrix

In [115]:
semivar_df = dist_df[['id_x', 'id_y', 'dist', 'time_diff']]
semivar_df['P1_semivar'] = np.square(dist_df['P1_diff'])/2
semivar_df['P2_semivar'] = np.square(dist_df['P2_diff'])/2

In [116]:
semivar_df

Unnamed: 0,id_x,id_y,dist,time_diff,P1_semivar,P2_semivar
1,0,1,595.827909,27.0,650.88320,259.92000
2,0,2,1117.063201,167.0,6.66125,2.42000
3,0,3,471.343173,9.0,1.74845,0.53045
4,0,4,460.733988,155.0,10.12500,7.29620
5,0,5,217.805403,98.0,1.32845,0.08000
6,0,6,1273.998530,189.0,1635.92000,435.71520
7,0,7,322.827339,88.0,70.44845,14.04500
8,0,8,306.973214,137.0,3.00125,0.02420
9,0,9,98.026644,214.0,2.04020,0.05120
10,0,10,192.717115,172.0,98.42045,1.28000
