## Prepare activities with geometry info

In [37]:
import pandas as pd
import geopandas as gpd


### Load datasets

In [38]:
# Load data
acts = pd.read_csv("../data/activities.csv", low_memory=False)
acts.shape

(164719, 10)

In [39]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,HmE,1075,1965.0,890.0
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Hm,610,890.0,280.0


In [40]:
acts.isna().sum()

TRIPID              0
PERSID              0
HHID                0
TRIPNO              0
ActLocation        68
ActLocationType     0
ActType             0
ActStartTime        0
ActEndTime          4
ActDuration         4
dtype: int64

In [41]:
len(acts.ActLocation.unique()), len(vic_lga.LGA_NAME20.unique())

(67, 82)

In [42]:
set(acts.ActLocation.unique()) - set(vic_lga.LGA_NAME20.unique())

{'Kingston (C)', 'Latrobe (C)', 'Melton (S)', 'Unincorporated Victoria', nan}

In [43]:
set(vic_lga.LGA_NAME20.unique()) - set(acts.ActLocation.unique())

{'Alpine (S)',
 'Buloke (S)',
 'Central Goldfields (S)',
 'Gannawarra (S)',
 'Hindmarsh (S)',
 'Horsham (RC)',
 'Kingston (C) (Vic.)',
 'Latrobe (C) (Vic.)',
 'Loddon (S)',
 'Melton (C)',
 'Migratory - Offshore - Shipping (Vic.)',
 'Mildura (RC)',
 'No usual address (Vic.)',
 'Northern Grampians (S)',
 'Pyrenees (S)',
 'Southern Grampians (S)',
 'Towong (S)',
 'Unincorporated Vic',
 'West Wimmera (S)',
 'Wodonga (C)'}

### TODO: MATCHING LGA NAMES

In [44]:
# Load LGA geo dataframe
lga = gpd.read_file("zip://../data/lga_2020_aust_shp.zip", low_memory=False)
lga.shape

(562, 6)

In [45]:
# Get LGA for Victoria
vic_lga = lga[lga.STE_NAME16.isin(['Victoria'])]
vic_lga.shape

(82, 6)

In [46]:
vic_lga.head()

Unnamed: 0,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
131,20110,Alpine (S),2,Victoria,4788.1568,"POLYGON ((146.67057 -36.56828, 146.67056 -36.5..."
132,20260,Ararat (RC),2,Victoria,4211.1171,"POLYGON ((143.18569 -37.18385, 143.18598 -37.1..."
133,20570,Ballarat (C),2,Victoria,739.0321,"POLYGON ((143.85331 -37.68123, 143.85320 -37.6..."
134,20660,Banyule (C),2,Victoria,62.5402,"POLYGON ((145.08875 -37.69136, 145.08925 -37.6..."
135,20740,Bass Coast (S),2,Victoria,865.8095,"MULTIPOLYGON (((145.11016 -38.51961, 145.10991..."


In [47]:
# Join LGA geometry into activities
acts = acts.merge(vic_lga, how='left', left_on='ActLocation', right_on='LGA_NAME20')
acts.shape

(164719, 16)

In [48]:
acts[acts.isna()]

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164714,,,,,,,,,,,,,,,,
164715,,,,,,,,,,,,,,,,
164716,,,,,,,,,,,,,,,,
164717,,,,,,,,,,,,,,,,


In [49]:
acts.ActLocation.value_counts()

Melbourne (C)          10242
Boroondara (C)          8189
Greater Geelong (C)     7951
Whitehorse (C)          7837
Casey (C)               7753
                       ...  
Corangamite (S)            1
Strathbogie (S)            1
Glenelg (S)                1
Indigo (S)                 1
Moyne (S)                  1
Name: ActLocation, Length: 66, dtype: int64

In [50]:
# Drop NA
acts = acts.dropna()

In [51]:
acts.shape

(155596, 16)

In [52]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,HmE,1075,1965.0,890.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Hm,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."


### Generate Points

In [65]:
import numpy as np
import random
from shapely.geometry import Polygon, Point

def random_point(poly):
    min_x, min_y, max_x, max_y = poly.bounds
    while True:
        rand_point = Point([random.uniform(min_x, max_x), random.uniform(min_y, max_y)])
        if rand_point.within(poly):
            return rand_point

In [82]:
p2 = acts.loc[1].geometry
type(p2)

shapely.geometry.point.Point

In [84]:
p2.x, p2.y

(145.068866298965, -37.606366968046515)

In [75]:
%%time
acts.geometry = acts.geometry.apply(random_point)
acts.shape

CPU times: user 1min 17s, sys: 49.4 ms, total: 1min 17s
Wall time: 1min 17s


(155596, 16)

In [76]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.084565245357 -37.59963437468534)
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,HmE,1075,1965.0,890.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.068866298965 -37.60636696804652)
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1932421664102 -37.48051082063128)
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1720398412042 -37.48443669916352)
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Hm,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1385023599605 -37.46866615952665)


##### Get lon, lat for each point

In [85]:
%%time
acts['lon'] = acts.geometry.apply(lambda p:p.x)
acts['lat'] = acts.geometry.apply(lambda p:p.y)

CPU times: user 1.71 s, sys: 19 µs, total: 1.71 s
Wall time: 1.71 s


In [86]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry,lon,lat
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.084565245357 -37.59963437468534),145.084565,-37.599634
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,HmE,1075,1965.0,890.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.068866298965 -37.60636696804652),145.068866,-37.606367
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1932421664102 -37.48051082063128),145.193242,-37.480511
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1720398412042 -37.48443669916352),145.17204,-37.484437
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Hm,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.1385023599605 -37.46866615952665),145.138502,-37.468666


In [95]:
acts = gpd.GeoDataFrame(acts)
acts.crs = vic_lga.crs

### Generate Datetime

In [105]:
from datetime import datetime

In [106]:
datetime.now()

datetime.datetime(2021, 3, 18, 14, 34, 0, 731825)

In [113]:
def get_datetime(stime):
    h = stime // 60
    if h > 23:
        h = h - 24
    m = stime % 60
    now = datetime.now()
    d = datetime(now.year, now.month, now.day, h, m)
    return d
    
get_datetime(520)

datetime.datetime(2021, 3, 18, 8, 40)

In [114]:
1490//60

24

In [115]:
%%time
acts['datetime'] = acts.ActStartTime.apply(get_datetime)

CPU times: user 91.1 ms, sys: 12.1 ms, total: 103 ms
Wall time: 103 ms


In [117]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry,lon,lat,datetime
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.08457 -37.59963),145.084565,-37.599634,2021-03-18 09:05:00
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,HmE,1075,1965.0,890.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.06887 -37.60637),145.068866,-37.606367,2021-03-18 17:55:00
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.19324 -37.48051),145.193242,-37.480511,2021-03-18 08:40:00
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.17204 -37.48444),145.17204,-37.484437,2021-03-18 09:10:00
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Hm,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.13850 -37.46867),145.138502,-37.468666,2021-03-18 10:10:00


In [118]:
# Save into geojson file
acts.to_file("../data/activities.geojson", driver="GeoJSON")