In [17]:
import transbigdata as tbd
import pandas as pd
import numpy as np
import geopandas as gpd
from transbigdata.grids import (
    area_to_params,
    GPS_to_grid,
    grid_to_polygon,
    grid_to_centre,
)
from transbigdata.odprocess import (
    odagg_grid
)

In [11]:
import os
import pandas as pd

file_path = os.path.abspath('..') + '/preprocess/'

In [12]:
file_path

'/home/lizhenyu/Project/E2DTC/preprocess/'

In [18]:
def visualization_data(data, col=['lon', 'lat'], accuracy=500, height=500,
                       maptype='point', zoom='auto'):
    '''
    The input is the data points, this function will aggregate and then
    visualize it

    Parameters
    -------
    data : DataFrame
        The data point
    col : List
        The column name. The user can choose a non-weight Origin-Destination
        (OD) data, in the sequence of [longitude, latitude]. For this, The
        aggregation is automatic. Or, the user can also input a weighted OD
        data, in the sequence of [longitude, latitude, count]
    zoom : number
        Map zoom level (Optional). Default value: auto
    height : number
        The height of the map frame
    accuracy : number
        Grid size
    maptype : str
        Map type, ‘point’ or ‘heatmap’

    Returns
    -------
    vmap : keplergl.keplergl.KeplerGl
        Visualizations provided by keplergl
    '''
    try:
        from keplergl import KeplerGl
    except ImportError: # pragma: no cover
        raise ImportError( # pragma: no cover
            "Please install keplergl, run "
            "the following code in cmd: pip install keplergl")

    if len(col) == 2:
        lon, lat = col[0], col[1]
        count = 'count'

        data[lon] = data[lon].astype('float')
        data[lat] = data[lat].astype('float')
        #clean data
        data = data[-((data[lon].isnull())|(data[lat].isnull()))]
        data = data[(data[lon]>=-180)&(data[lon]<=180)&(data[lat]>=-90)&(data[lat]<=90)]

        bounds = [data[lon].min(), data[lat].min(),
                  data[lon].max(), data[lat].max()]
        lon_center, lat_center = data[lon].mean(), data[lat].mean()
        if zoom == 'auto':
            lon_min, lon_max = data[lon].quantile(
                0.05), data[lon].quantile(0.95)
            zoom = 8.5-np.log(lon_max-lon_min)/np.log(2)
        params = area_to_params(bounds, accuracy=accuracy)
        data['LONCOL'], data['LATCOL'] = GPS_to_grid(
            data[lon], data[lat], params)
        data[count] = 1
        data = data.groupby(['LONCOL', 'LATCOL'])[
            'count'].sum().reset_index().reset_index()
        data['geometry'] = grid_to_polygon(
            [data['LONCOL'], data['LATCOL']], params)
        data[lon], data[lat] = grid_to_centre(
            [data['LONCOL'], data['LATCOL']], params)
        data = gpd.GeoDataFrame(data)
        data
    if len(col) == 3:
        lon, lat, count = col

        data[lon] = data[lon].astype('float')
        data[lat] = data[lat].astype('float')
        #clean data
        data = data[-((data[lon].isnull())|(data[lat].isnull()))]
        data = data[(data[lon]>=-180)&(data[lon]<=180)&(data[lat]>=-90)&(data[lat]<=90)]

        bounds = [data[lon].min(), data[lat].min(),
                  data[lon].max(), data[lat].max()]
        lon_center, lat_center = data[lon].mean(), data[lat].mean()
        if zoom == 'auto':
            lon_min, lon_max = data[lon].quantile(
                0.05), data[lon].quantile(0.95)
            zoom = 8.5-np.log(lon_max-lon_min)/np.log(2)
        params = area_to_params(bounds, accuracy=accuracy)
        data['LONCOL'], data['LATCOL'] = GPS_to_grid(
            data[lon], data[lat], params)
        data = data.groupby(['LONCOL', 'LATCOL'])[count].sum().reset_index()
        data['geometry'] = grid_to_polygon(
            [data['LONCOL'], data['LATCOL']], params)
        data[lon], data[lat] = grid_to_centre(
            [data['LONCOL'], data['LATCOL']], params)

        data = gpd.GeoDataFrame(data)

    if maptype == 'heatmap':
        vmap = KeplerGl(config={ # pragma: no cover
            'version': 'v1',
            'config': {
                'visState': {
                    'filters': [],
                    'layers': [
                        {'id': 'vpefba0o',
                         'type': 'heatmap',
                         'config': {
                             'dataId': 'data',
                             'label': 'Point',
                             'color': [18, 147, 154],
                             'highlightColor': [252, 242, 26, 255],
                             'columns': {'lat': lat, 'lng': lon},
                             'isVisible': True,
                             'visConfig': {
                                 'opacity': 0.8,
                                 'colorRange': {
                                     'name': 'Global Warming',
                                     'type': 'sequential',
                                     'category': 'Uber',
                                     'colors': ['#5A1846',
                                                '#900C3F',
                                                '#C70039',
                                                '#E3611C',
                                                '#F1920E',
                                                '#FFC300']},
                                 'radius': 23},
                             'hidden': False,
                             'textLabel': [{'field': None,
                                            'color': [255, 255, 255],
                                            'size': 18,
                                            'offset': [0, 0],
                                            'anchor': 'start',
                                            'alignment': 'center'}]},
                         'visualChannels': {
                             'weightField': {'name': count, 'type': 'integer'},
                             'weightScale': 'linear'}}],
                    'interactionConfig': {
                        'tooltip': {
                            'fieldsToShow': {
                                'data': [{
                                    'name': count,
                                    'format': None}]},
                            'compareMode': False,
                            'compareType': 'absolute',
                            'enabled': True},
                        'brush': {'size': 0.5, 'enabled': False},
                        'geocoder': {'enabled': False},
                        'coordinate': {'enabled': False}},
                    'layerBlending': 'normal',
                    'splitMaps': [],
                    'animationConfig': {'currentTime': None, 'speed': 1}},
                'mapState':
                {
                    'bearing': 0,
                    'dragRotate': True,
                    'latitude': lat_center,
                    'longitude': lon_center,
                    'pitch': 0,
                    'zoom': zoom,
                    'isSplit': False
                },
                'mapStyle':
                {
                    'styleType': 'dark',
                    'topLayerGroups':
                    {},
                    'visibleLayerGroups':
                    {
                        'label': True,
                        'road': True,
                        'border': False,
                        'building': True,
                        'water': True,
                        'land': True,
                        '3d building': False
                    },
                    'threeDBuildingColor': [9.665468314072013,
                                            17.18305478057247,
                                            31.1442867897876
                                            ],
                    'mapStyles':
                    {}
                }}}, data={'data': data.to_json()}, height=height)
    else:
        vmap = KeplerGl(config={
            'version': 'v1',
            'config': {
                'visState': {
                    'filters': [],
                    'layers': [{
                        'id': 'ytak0zp',
                        'type': 'geojson',
                        'config': {
                            'dataId': count,
                            'label': count,
                            'color': [77, 193, 156],
                            'highlightColor': [252, 242, 26, 255],
                            'columns': {'geojson': '_geojson'},
                            'isVisible': True,
                            'visConfig': {
                                'opacity': 0.8,
                                'strokeOpacity': 0.8,
                                'thickness': 0.5,
                                'strokeColor': [218, 112, 191],
                                'colorRange': {
                                    'name': 'Global Warming',
                                    'type': 'sequential',
                                    'category': 'Uber',
                                    'colors': ['#5A1846',
                                               '#900C3F',
                                               '#C70039',
                                               '#E3611C',
                                               '#F1920E',
                                               '#FFC300']},
                                'strokeColorRange': {'name': 'Global Warming',
                                                     'type': 'sequential',
                                                     'category': 'Uber',
                                                     'colors': ['#5A1846',
                                                                '#900C3F',
                                                                '#C70039',
                                                                '#E3611C',
                                                                '#F1920E',
                                                                '#FFC300']},
                                'radius': 10,
                                'sizeRange': [0, 10],
                                'radiusRange': [0, 50],
                                'heightRange': [0, 500],
                                'elevationScale': 5,
                                'enableElevationZoomFactor': True,
                                'stroked': False,
                                'filled': True,
                                'enable3d': False,
                                'wireframe': False},
                            'hidden': False,
                            'textLabel': [{'field': None,
                                           'color': [255, 255, 255],
                                           'size': 18,
                                           'offset': [0, 0],
                                           'anchor': 'start',
                                           'alignment': 'center'}]},
                        'visualChannels': {
                            'colorField': {'name': count, 'type': 'integer'},
                            'colorScale': 'quantile',
                            'strokeColorField': None,
                            'strokeColorScale': 'quantile',
                            'sizeField': None,
                            'sizeScale': 'linear',
                            'heightField': None,
                            'heightScale': 'linear',
                            'radiusField': None,
                            'radiusScale': 'linear'}}],
                    'layerBlending': 'normal',
                    'splitMaps': [],
                    'animationConfig': {'currentTime': None, 'speed': 1}},
                'mapState': {'bearing': 0,
                             'dragRotate': False,
                             'latitude': data[lat].mean(),
                             'longitude': data[lon].mean(),
                             'pitch': 0,
                             'zoom': 10,
                             'isSplit': False}}},
            data={count: data.to_json()}, height=height)

    return vmap

In [13]:
all_traj = pd.read_hdf(file_path+'all_traj_labeled_σ:0.8_λ:0.7.h5')
print(all_traj['label'].value_counts())

0     542
3     350
5     221
1     136
8     127
10     92
2      76
11     45
9      43
6      43
4      31
7      25
Name: label, dtype: int64


In [8]:
traj_0 = all_traj[all_traj['label'] == 0]
traj_0

Unnamed: 0,trajectory,user_index,year,month,day,label
0,22042 [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,0,2009,3,31,0
1,[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,0,2009,4,3,0
2,15374 15429 15665 15466 15874 15502 15653 1554...,0,2009,4,4,0
3,[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,0,2009,4,5,0
4,[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,0,2009,4,6,0
...,...,...,...,...,...,...
1721,18621 18546 18840 19032 18995 18353 18747 [PAD...,167,2008,12,27,0
1723,[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,179,2008,8,24,0
1725,17656 18029 17780 17517 17659 17657 17719 1761...,179,2008,9,12,0
1726,[PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD] [PAD...,179,2008,9,13,0


In [19]:
?data.to_json

In [7]:
df_all = pd.read_hdf('./preprocess/Geolife_all_user.h5',key='data')


Unnamed: 0,time,lat,lon,alt,label,user
0,2000-01-01 23:12:19,39.988992,116.327023,128.937005,0,163
1,2000-01-01 23:13:21,39.990964,116.327041,221.128615,0,163
2,2000-01-01 23:15:23,39.993207,116.326827,217.191591,0,163
0,2007-04-12 09:31:32,39.974233,116.330383,823.490814,0,142
1,2007-04-12 09:39:37,39.974317,116.330450,823.490814,0,142
...,...,...,...,...,...,...
291,2012-07-27 08:31:03,39.979481,116.303499,131.760207,0,163
292,2012-07-27 08:31:05,39.979475,116.303497,131.097241,0,163
293,2012-07-27 08:31:10,39.979374,116.303522,126.235679,0,163
294,2012-07-27 08:31:15,39.979190,116.303587,119.301427,0,163


In [10]:
data = df_all[df_all['user']==0]
data

Unnamed: 0,time,lat,lon,alt,label,user
0,2008-10-23 02:53:04,39.984702,116.318417,492.0,0,0
1,2008-10-23 02:53:10,39.984683,116.318450,492.0,0,0
2,2008-10-23 02:53:15,39.984686,116.318417,492.0,0,0
3,2008-10-23 02:53:20,39.984688,116.318385,492.0,0,0
4,2008-10-23 02:53:25,39.984655,116.318263,492.0,0,0
...,...,...,...,...,...,...
2307,2009-07-05 07:44:55,40.000403,116.327255,149.0,0,0
2308,2009-07-05 07:45:00,40.000433,116.327209,150.0,0,0
2309,2009-07-05 07:45:05,40.000443,116.327186,150.0,0,0
2310,2009-07-05 07:45:10,40.000522,116.327132,149.0,0,0


In [13]:
tbd.data_summary(df_all, col=['user','time'], show_sample_duration=True)

Amount of data
-----------------
Total number of data items:  24876978
Total number of individuals:  182
Data volume of individuals(Mean):  136686.6923
Data volume of individuals(Upper quartile):  143041.5
Data volume of individuals(Median):  35181.5
Data volume of individuals(Lower quartile):  3359.0

Data time period
-----------------
Start time:  2000-01-01 23:12:19
End time:  2012-07-27 08:31:20

Sampling interval
-----------------
Mean:  118.8427 s
Upper quartile:  5.0 s
Median:  2.0 s
Lower quartile:  1.0 s


In [14]:
?tbd.taxigps_to_od

In [16]:
oddata = tbd.taxigps_to_od(df_all, col=['user','time','lon','lat'])
oddata

ValueError: not enough values to unpack (expected 5, got 4)

In [None]:
tbd.visualization_data(data,col=['lon', 'lat'])