In [1]:
%load_ext autoreload
%autoreload 2



# load dependencies'
import pandas as pd
import geopandas as gpd
import numpy as np
import seaborn as sns


from math import ceil, floor
from shapely.geometry import Polygon, MultiPoint
import datetime

from envirocar import TrackAPI, DownloadClient, BboxSelector, ECConfig#, TrackConverter
from eda_quality import inspection as inspect

import warnings
warnings.simplefilter(action='ignore')#, category=FutureWarning)


# create an initial but optional config and an api client
config = ECConfig()
track_api = TrackAPI(api_client=DownloadClient(config=config))

In [2]:
bbox =BboxSelector([
    7.004130554199218, # min_x
    52.50590322041212, # min_y
    7.990351104736328, # max_x
    52.99874790276371  # max_y
])


# issue a query
track_df = track_api.get_tracks(bbox=bbox, num_results=2) # requesting 10 tracks inside the bbox
# track_df

In [3]:

%prun 

def spatioTemporalAggregation(df, field, summary, gridSize):
    """
    Aggregates the given field on hour and weekday basis.
    Prepares data for mosaic plot
    FOR THIS TO WORK YOU NEED TO INSTALL RTree or Rtree-linux!!!
    # TODO This function is poorly performing
    Parameters
    ----------
    df : geopandas dataframe
    field : string
        field to be summarized.
    summary : string
        type of summary to be sumarized. eg. min, max,sum, median
    gridSize : float
        the size of grid on same unit as geodataframe coordinates.

    Returns
    -------
    geodataframes: one each for larger grid and other for subgrids
        (for visualization purpose only)
        Aggregated grids with summary on it

    """
    def round_down(num, divisor):
        return floor(num / divisor) * divisor

    def round_up(num, divisor):
        return ceil(num / divisor) * divisor

    # Get crs from data
    sourceCRS = df.crs
    targetCRS = "epsg:3857"
    # Reproject to Mercator\
    df = df.to_crs(targetCRS)

    # Get bounds
    xmin, ymin, xmax, ymax = df.total_bounds
    height, width = gridSize, gridSize
    top, left = round_up(ymax, height), round_down(xmin, width)
    bottom, right = round_down(ymin, height), round_up(xmax, width)

    rows = int((top - bottom) / height)+1
    cols = int((right - left) / width)+1

    XleftOrigin = left
    XrightOrigin = left + width
    YtopOrigin = top
    YbottomOrigin = top - height
    polygons = []

    for i in range(cols):
        Ytop = YtopOrigin
        Ybottom = YbottomOrigin
        for j in range(rows):
            polygons.append(Polygon(
                [(XleftOrigin, Ytop), (XrightOrigin, Ytop),
                 (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)]))
            Ytop = Ytop - height
            Ybottom = Ybottom - height
        XleftOrigin = XleftOrigin + width
        XrightOrigin = XrightOrigin + width

    grid = gpd.GeoDataFrame({'geometry': polygons})
    grid.crs = (targetCRS)

    # Assign gridid
    numGrid = len(grid)
    grid['gridId'] = list(range(numGrid))

    # Identify gridId for each point

    df['hour'] = df['time'].apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%dT%H:%M:%S+00:00')).dt.hour
    df['weekday'] = df['time'].apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%dT%H:%M:%S+00:00')).dt.dayofweek

    # df['hour'] = pd.to_datetime(df['time']).dt.hour
    # df['weekday'] = pd.to_datetime(df['time']).dt.dayofweek

    points_identified = gpd.sjoin(df, grid, op='within')

    # group points by gridid and calculate mean Easting,
    # store it as dataframe
    # delete if field already exists
    if field in grid.columns:
        del grid[field]

    # Aggregate by weekday, hour and grid
    grouped = points_identified.groupby(
        ['gridId', 'weekday', 'hour']).agg({field: [summary]})
    grouped = grouped.reset_index()
    grouped.columns = grouped.columns.map("_".join)
    modified_fieldname = field+"_"+summary

    # Create Subgrids
    subgrid, mainGrid, rowNum, columnNum, value = [], [], [], [], []
    unikGrid = grouped['gridId_'].unique()
    print('running; wait till you see "finished"')
    for currentGrid in unikGrid:
        dataframe = grid[grid['gridId'] == currentGrid]
        xmin, ymin, xmax, ymax = dataframe.total_bounds
        xminn, xmaxx, yminn, ymaxx = xmin + \
            (xmax-xmin)*0.05, xmax-(xmax-xmin)*0.05, ymin + \
            (ymax-ymin)*0.05, ymax-(ymax-ymin)*0.05
        rowOffset = (ymaxx-yminn)/24.0
        colOffset = (xmaxx - xminn)/7.0
        tmp = (grouped['gridId_'] == currentGrid)
        for i in range(7):
            tmp2=(grouped['weekday_'] == i)
            for j in range(24):
                topy, bottomy, leftx, rightx = ymaxx-j*rowOffset, ymaxx - \
                    (j+1)*rowOffset, xminn+i * \
                    colOffset, xminn+(i+1)*colOffset
                subgrid.append(
                    Polygon([(leftx, topy), (rightx, topy),
                             (rightx, bottomy), (leftx, bottomy)]))
                mainGrid.append(currentGrid)
                rowNum.append(j)
                columnNum.append(i)
                if len(grouped[tmp
                       & tmp2
                       & (grouped['hour_'] == j)]) != 0:
                    this_value = grouped[
                        tmp
                        & tmp2
                        & (grouped['hour_'] == j)].iloc[0][
                            modified_fieldname]
                    value.append(this_value)
                else:
                    value.append(np.nan)
    subgrid_gpd = gpd.GeoDataFrame({'geometry': subgrid})
    subgrid_gpd.crs = targetCRS
    # Reproject to Mercator\
    subgrid_gpd = subgrid_gpd.to_crs(sourceCRS)
    subgrid_gpd['gridId'] = mainGrid
    subgrid_gpd['Weekday'] = columnNum
    subgrid_gpd['hour'] = rowNum
    subgrid_gpd['gridId'] = subgrid_gpd.apply(lambda x: str(
        x['gridId'])+"_"+str(x['Weekday'])+"_"+str(x['hour']), axis=1)
    subgrid_gpd[modified_fieldname] = value
    subgrid_gpd = subgrid_gpd.dropna()
    grid = grid.to_crs(sourceCRS)
    grid = grid[grid['gridId'].isin(unikGrid)]
    print('finished')
    return grid, subgrid_gpd
    # final_subgrid=subgrid_gpd[subgrid_gpd['value'].notnull()]
    # return final_subgrid

#%prun spatioTemporalAggregation(track_df, "Speed.value","mean",1000)

 

### Full Script Profile
Here we determine the performance of the function 'spatioTemporalAggregation' from the module 'Inspection' with python's built-in code profiler. This will open the pager (a window) with a table the indicates where the execution is spending the most time on order of total time on each function call.

In [4]:
%prun spatioTemporalAggregation(track_df, "Speed.value","mean",1000)

running; wait till you see "finished"
finished
 

### Line-by-Line Profile
With the package line_profiler we can get a line-by-line report, instead of a function-by-function report. For this install the line_profiler with pip and load the line-profiler extension. You need to explicitly define which function you are interested in. 

In [5]:
%load_ext line_profiler

%lprun -f spatioTemporalAggregation spatioTemporalAggregation(track_df, "Speed.value","mean",1000)

running; wait till you see "finished"
finished
