## Prepare activities with geometry info

In [1]:
import pandas as pd
import geopandas as gpd


### Load datasets

In [2]:
# Load data
acts = pd.read_csv("../data/activities.csv", low_memory=False)
acts.shape

(164719, 10)

In [3]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,2,Whittlesea (C),Accommodation,Home,1075,,
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Home,610,890.0,280.0


In [4]:
acts.isna().sum()

TRIPID                 0
PERSID                 0
HHID                   0
TRIPNO                 0
ActLocation           68
ActLocationType        0
ActType                0
ActStartTime           0
ActEndTime         46082
ActDuration        46082
dtype: int64

In [9]:
len(acts.ActLocation.unique()), len(vic_lga.LGA_NAME20.unique())

(67, 82)

In [10]:
set(acts.ActLocation.unique()) - set(vic_lga.LGA_NAME20.unique())

{'Kingston (C)', 'Latrobe (C)', 'Melton (S)', 'Unincorporated Victoria', nan}

In [11]:
set(vic_lga.LGA_NAME20.unique()) - set(acts.ActLocation.unique())

{'Alpine (S)',
 'Buloke (S)',
 'Central Goldfields (S)',
 'Gannawarra (S)',
 'Hindmarsh (S)',
 'Horsham (RC)',
 'Kingston (C) (Vic.)',
 'Latrobe (C) (Vic.)',
 'Loddon (S)',
 'Melton (C)',
 'Migratory - Offshore - Shipping (Vic.)',
 'Mildura (RC)',
 'No usual address (Vic.)',
 'Northern Grampians (S)',
 'Pyrenees (S)',
 'Southern Grampians (S)',
 'Towong (S)',
 'Unincorporated Vic',
 'West Wimmera (S)',
 'Wodonga (C)'}

### TODO: MATCHING LGA NAMES

In [12]:
# Load LGA geo dataframe
lga = gpd.read_file("zip://../data/lga_2020_aust_shp.zip", low_memory=False)
lga.shape

(562, 6)

In [13]:
# Get LGA for Victoria
vic_lga = lga[lga.STE_NAME16.isin(['Victoria'])]
vic_lga.shape

(82, 6)

In [14]:
vic_lga.head()

Unnamed: 0,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
131,20110,Alpine (S),2,Victoria,4788.1568,"POLYGON ((146.67057 -36.56828, 146.67056 -36.5..."
132,20260,Ararat (RC),2,Victoria,4211.1171,"POLYGON ((143.18569 -37.18385, 143.18598 -37.1..."
133,20570,Ballarat (C),2,Victoria,739.0321,"POLYGON ((143.85331 -37.68123, 143.85320 -37.6..."
134,20660,Banyule (C),2,Victoria,62.5402,"POLYGON ((145.08875 -37.69136, 145.08925 -37.6..."
135,20740,Bass Coast (S),2,Victoria,865.8095,"MULTIPOLYGON (((145.11016 -38.51961, 145.10991..."


In [15]:
# Join LGA geometry into activities
acts = acts.merge(vic_lga, how='left', left_on='ActLocation', right_on='LGA_NAME20')
acts.shape

(164719, 16)

In [16]:
acts[acts.isna()]

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,,,,,,,,,,,,,,,,
1,,,,,,,,,,,,,,,,
2,,,,,,,,,,,,,,,,
3,,,,,,,,,,,,,,,,
4,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164714,,,,,,,,,,,,,,,,
164715,,,,,,,,,,,,,,,,
164716,,,,,,,,,,,,,,,,
164717,,,,,,,,,,,,,,,,


In [17]:
acts.ActLocation.value_counts()

Melbourne (C)          10242
Boroondara (C)          8189
Greater Geelong (C)     7951
Whitehorse (C)          7837
Casey (C)               7753
                       ...  
Yarriambiack (S)           1
Campaspe (S)               1
Indigo (S)                 1
Wangaratta (RC)            1
Glenelg (S)                1
Name: ActLocation, Length: 66, dtype: int64

In [18]:
# Drop NA
acts = acts.dropna()

In [19]:
acts.shape

(112266, 16)

In [20]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Home,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."
5,Y12H0000101P02T04,Y12H0000101P02,Y12H0000101,4,Whittlesea (C),Place of Education,PuDo,910,940.0,30.0,27070,Whittlesea (C),2,Victoria,489.6939,"POLYGON ((145.13242 -37.42004, 145.13277 -37.4..."


### Generate Points

In [23]:
import numpy as np
import random
from shapely.geometry import Polygon, Point

def random_point(poly):
    min_x, min_y, max_x, max_y = poly.bounds
    while True:
        rand_point = Point([random.uniform(min_x, max_x), random.uniform(min_y, max_y)])
        if rand_point.within(poly):
            return rand_point       


In [26]:
%%time
# Option 1: Generate a random point in zone
# acts.geometry = acts.geometry.apply(random_point)
# Option 2: Generate the centroid of the zone
acts.geometry = acts.geometry.apply(lambda x: x.centroid)
acts.shape

CPU times: user 10.3 s, sys: 1.46 s, total: 11.8 s
Wall time: 11.8 s


(112266, 16)

In [27]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999)
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999)
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999)
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Home,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999)
5,Y12H0000101P02T04,Y12H0000101P02,Y12H0000101,4,Whittlesea (C),Place of Education,PuDo,910,940.0,30.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999)


##### Get lon, lat for each point

In [28]:
%%time
acts['lon'] = acts.geometry.apply(lambda p:p.x)
acts['lat'] = acts.geometry.apply(lambda p:p.y)

CPU times: user 1.31 s, sys: 110 µs, total: 1.31 s
Wall time: 1.3 s


In [29]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry,lon,lat
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999),145.077693,-37.547212
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999),145.077693,-37.547212
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999),145.077693,-37.547212
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Home,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999),145.077693,-37.547212
5,Y12H0000101P02T04,Y12H0000101P02,Y12H0000101,4,Whittlesea (C),Place of Education,PuDo,910,940.0,30.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.0776926573542 -37.54721248854999),145.077693,-37.547212


In [30]:
acts = gpd.GeoDataFrame(acts)
acts.crs = vic_lga.crs

### Generate Datetime

In [31]:
from datetime import datetime

In [32]:
datetime.now()

datetime.datetime(2021, 4, 4, 18, 34, 39, 948117)

In [33]:
def get_datetime(stime):
    h = stime // 60
    if h > 23:
        h = h - 24
    m = stime % 60
    now = datetime.now()
    d = datetime(now.year, now.month, now.day, h, m)
    return d
    
get_datetime(520)

datetime.datetime(2021, 4, 4, 8, 40)

In [34]:
1490//60

24

In [35]:
%%time
acts['datetime'] = acts.ActStartTime.apply(get_datetime)

CPU times: user 80.9 ms, sys: 4.13 ms, total: 85 ms
Wall time: 84.1 ms


In [36]:
acts.head()

Unnamed: 0,TRIPID,PERSID,HHID,TRIPNO,ActLocation,ActLocationType,ActType,ActStartTime,ActEndTime,ActDuration,LGA_CODE20,LGA_NAME20,STE_CODE16,STE_NAME16,AREASQKM20,geometry,lon,lat,datetime
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,Whittlesea (C),Workplace,Work,545,1050.0,505.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.07769 -37.54721),145.077693,-37.547212,2021-04-04 09:05:00
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,Whittlesea (C),Place of Education,PuDo,520,540.0,20.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.07769 -37.54721),145.077693,-37.547212,2021-04-04 08:40:00
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,2,Whittlesea (C),Shops,Shop,550,600.0,50.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.07769 -37.54721),145.077693,-37.547212,2021-04-04 09:10:00
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,3,Whittlesea (C),Accommodation,Home,610,890.0,280.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.07769 -37.54721),145.077693,-37.547212,2021-04-04 10:10:00
5,Y12H0000101P02T04,Y12H0000101P02,Y12H0000101,4,Whittlesea (C),Place of Education,PuDo,910,940.0,30.0,27070,Whittlesea (C),2,Victoria,489.6939,POINT (145.07769 -37.54721),145.077693,-37.547212,2021-04-04 15:10:00


In [37]:
# Save into geojson file
acts.to_file("../data/activities.geojson", driver="GeoJSON")