In [49]:
import numpy as np
import pandas as pd
import geopandas as gpd
import requests
import zipfile

### VISTA data set
Source: https://transport.vic.gov.au/about/data-and-research/vista/vista-data-and-publications

In [33]:
def download_data(url, save_to):
    """
    This function download data file from `url` and save it to `save_to` file.
    :param url: the url of data
    :param save_to: the location to save file.
    """
    with open(save_to, 'wb') as f:
        r = requests.get(url)
        f.write(r.content)

In [34]:
# Get vista data (2012-2018) zip file, and store in data folder.
url = "https://transport.vic.gov.au/-/media/tfv-documents/vista/vista-12-18.zip"
save_to = "../data/vista-12-18.zip"
download_data(url, save_to)

In [35]:
# Open the zip file, and check the csv files inside
z = zipfile.ZipFile(save_to)
z.printdir()

File Name                                             Modified             Size
H_VISTA_1218_V1.csv                            2020-03-12 12:52:20      4367531
JTE_VISTA1218_V1.csv                           2019-12-18 13:17:44      1943852
JTW_VISTA1218_V1.csv                           2019-12-15 23:48:42      6349199
P_VISTA1218_V1.csv                             2019-12-19 12:27:12     20703620
S_VISTA1218_V1.csv                             2019-12-18 14:30:22    105208498
T_VISTA1218_V1.csv                             2019-12-18 14:35:10     70120056
VISTA - Glossary of Variables 12-18.docx       2019-12-18 14:35:28       185046


In [36]:
# What we need are households, persons, and trips data
households = pd.read_csv(z.open("H_VISTA_1218_V1.csv"), low_memory=False)
persons = pd.read_csv(z.open("P_VISTA1218_V1.csv"), low_memory=False)
trips = pd.read_csv(z.open("T_VISTA1218_V1.csv"), low_memory=False)

households.shape, persons.shape, trips.shape

((25670, 32), (64500, 36), (174270, 59))

In [37]:
households.head()

Unnamed: 0,HHID,SurveyPeriod,TRAVDOW,TRAVMONTH,DayType,DWELLTYPE,OWNDWELL,HHSIZE,HHINC,VISITORS,...,TRUCKS,MBIKES,OTHERVEHS,TOTALVEHS,WDHHWGT,WEHHWGT,HOMELGA,HomeSubRegion,HomeRegion,HOMEPC
0,Y12H0000101,2012-13,Monday,May,Weekday,Separate House,Fully Owned,4.0,"$1,225",0.0,...,0.0,0.0,0.0,2.0,96.38,,Whittlesea (C),MELB - outer,MSD_31LGAs,3082.0
1,Y12H0000102,2012-13,Tuesday,May,Weekday,Separate House,Being Purchased,4.0,"$1,700",0.0,...,0.0,0.0,0.0,3.0,97.0,,Whittlesea (C),MELB - outer,MSD_31LGAs,3082.0
2,Y12H0000103,2012-13,Wednesday,May,Weekday,Separate House,Being Purchased,4.0,"$1,000",0.0,...,0.0,0.0,0.0,2.0,97.86,,Whittlesea (C),MELB - outer,MSD_31LGAs,3082.0
3,Y12H0000104,2012-13,Thursday,May,Weekday,Separate House,Fully Owned,3.0,"$3,625",0.0,...,0.0,0.0,0.0,3.0,98.35,,Whittlesea (C),MELB - outer,MSD_31LGAs,3082.0
4,Y12H0000107,2012-13,Sunday,May,Weekend Day,Separate House,Being Purchased,3.0,"$1,750",0.0,...,0.0,0.0,0.0,1.0,,307.89,Whittlesea (C),MELB - outer,MSD_31LGAs,3082.0


In [38]:
persons.head()

Unnamed: 0,PERSID,HHID,PERSNO,NUMSTOPS,MONTHOFBIRTH,YEAROFBIRTH,AGE,SEX,RELATIONSHIP,PERSINC,...,ANZSIC2,STARTPLACE,ADDITIONALTRAVEL,CYCLEDEXERCISE,CYCLEDOTHER,CYCLEDSHOPPING,CYCLEDWORK,NOCYCLED,WDPERSWGT,WEPERSWGT
0,Y12H0000101P01,Y12H0000101,1,2,April,1962,50,M,Self,$1000-1249 p.w.,...,Administrative Services,Survey address,No,No,No,No,No,Yes,84.77,
1,Y12H0000101P02,Y12H0000101,2,5,January,1969,43,F,Spouse,$1-199 p.w.,...,Not In Work Force,Survey address,No,No,No,No,No,Yes,92.98,
2,Y12H0000101P03,Y12H0000101,3,2,July,2000,11,F,Child,Zero Income,...,Not In Work Force,Survey address,No,No,No,No,No,Yes,100.23,
3,Y12H0000101P04,Y12H0000101,4,2,August,2005,6,F,Child,Zero Income,...,Not In Work Force,Survey address,No,No,No,No,No,Yes,99.02,
4,Y12H0000102P01,Y12H0000102,1,0,January,1955,57,F,Spouse,$1-199 p.w.,...,Not In Work Force,Survey address,,No,No,No,No,Yes,82.61,


In [39]:
trips.head()

Unnamed: 0,TRIPID,PERSID,HHID,STOPS,TRIPNO,STARTHOUR,STARTIME,ARRHOUR,ARRTIME,CUMDIST,...,TIME2,TIME3,TIME4,TIME5,TIME6,TIME7,TIME8,TIME9,WDTRIPWGT,WETRIPWGT
0,Y12H0000101P01T01,Y12H0000101P01,Y12H0000101,1,1,8,525,9,545,7.45,...,,,,,,,,,97.79,
1,Y12H0000101P01T02,Y12H0000101P01,Y12H0000101,1,2,17,1050,17,1075,7.45,...,,,,,,,,,97.79,
2,Y12H0000101P02T01,Y12H0000101P02,Y12H0000101,1,1,8,500,8,520,3.92,...,,,,,,,,,92.98,
3,Y12H0000101P02T02,Y12H0000101P02,Y12H0000101,1,2,9,540,9,550,2.5,...,,,,,,,,,92.98,
4,Y12H0000101P02T03,Y12H0000101P02,Y12H0000101,1,3,10,600,10,610,2.55,...,,,,,,,,,92.98,


### Shapefiles

Source: https://www.abs.gov.au/websitedbs/d3310114.nsf/home/digital+boundaries

##### Get Australian Statistical Geography Standard (ASGS) data for Victoria state

In [57]:
# Get meshblock info
url = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_mb_2016_vic_csv.zip&1270.0.55.001&Data%20Cubes&F1EA82ECA7A762BCCA257FED0013A253&0&July%202016&12.07.2016&Latest"
save_to = "../data/mb_2016_vic_csv.zip"
download_data(url, save_to)

In [43]:
z = zipfile.ZipFile(save_to)
z.printdir()

File Name                                             Modified             Size
MB_2016_VIC.csv                                2016-06-17 10:27:28     13491351


In [44]:
meshblocks = pd.read_csv(z.open("MB_2016_VIC.csv"), low_memory=False)
meshblocks.shape

(85014, 16)

In [45]:
meshblocks.head()

Unnamed: 0,MB_CODE_2016,MB_CATEGORY_NAME_2016,SA1_MAINCODE_2016,SA1_7DIGITCODE_2016,SA2_MAINCODE_2016,SA2_5DIGITCODE_2016,SA2_NAME_2016,SA3_CODE_2016,SA3_NAME_2016,SA4_CODE_2016,SA4_NAME_2016,GCCSA_CODE_2016,GCCSA_NAME_2016,STATE_CODE_2016,STATE_NAME_2016,AREA_ALBERS_SQKM
0,20000009499,NOUSUALRESIDENCE,29999949999,2949999,299999499,29499,No usual address (Vic.),29999,No usual address (Vic.),299,No usual address (Vic.),29499,No usual address (Vic.),2,Victoria,0.0
1,20000010000,Education,20403106914,2106914,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0127
2,20000021000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0127
3,20000022000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0105
4,20000023000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0105


In [47]:
# Get meshblock with geometry info
url = "https://www.abs.gov.au/AUSSTATS/subscriber.nsf/log?openagent&1270055001_mb_2016_vic_shape.zip&1270.0.55.001&Data%20Cubes&04F12B9E465AE765CA257FED0013B20F&0&July%202016&12.07.2016&Latest"
save_to = "../data/mb_2016_vic_shape.zip"
download_data(url, save_to)

In [53]:
gdf = gpd.read_file("zip://../data/mb_2016_vic_shape.zip", low_memory=False)
gdf.shape

(85014, 17)

In [55]:
gdf.head()

Unnamed: 0,MB_CODE16,MB_CAT16,SA1_MAIN16,SA1_7DIG16,SA2_MAIN16,SA2_5DIG16,SA2_NAME16,SA3_CODE16,SA3_NAME16,SA4_CODE16,SA4_NAME16,GCC_CODE16,GCC_NAME16,STE_CODE16,STE_NAME16,AREASQKM16,geometry
0,20000009499,NOUSUALRESIDENCE,29999949999,2949999,299999499,29499,No usual address (Vic.),29999,No usual address (Vic.),299,No usual address (Vic.),29499,No usual address (Vic.),2,Victoria,0.0,
1,20000010000,Education,20403106914,2106914,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0127,"POLYGON ((147.14253 -36.69221, 147.14202 -36.6..."
2,20000021000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0127,"POLYGON ((146.95934 -36.72781, 146.95965 -36.7..."
3,20000022000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0105,"POLYGON ((146.96136 -36.72687, 146.96271 -36.7..."
4,20000023000,Commercial,20403106902,2106902,204031069,21069,Bright - Mount Beauty,20403,Wodonga - Alpine,204,Hume,2RVIC,Rest of Vic.,2,Victoria,0.0105,"POLYGON ((146.96105 -36.72823, 146.96050 -36.7..."


### ABS census data