In [2]:
# Dependencies
import pandas as pd
import csv
import requests
from pprint import pprint
from scipy.stats import linregress
import matplotlib.pyplot as plt
from sodapy import Socrata
import os
import gmaps
from config import g_key

In [3]:
# Data collection - year 2022 - post-covid

client = Socrata("data.melbourne.vic.gov.au", app_token="PJ7LNUcljkE0xkoj2J0AzMeU4", username="david_salim2006@hotmail.com", password="David!2022")

results_2022 = client.get("b2ak-trbp", year= 2022, limit = 10000000)

results_2022_df = pd.DataFrame.from_records(results_2022)
results_2022_df.to_csv("foottrafficsdata2022.csv")
results_2022_df.head()

Unnamed: 0,id,date_time,year,month,mdate,day,time,sensor_id,sensor_name,hourly_counts
0,4231866,2022-04-05T15:00:00.000,2022,April,5,Tuesday,15,4,Town Hall (West),2132
1,4231867,2022-04-05T16:00:00.000,2022,April,5,Tuesday,16,4,Town Hall (West),2271
2,4231868,2022-04-05T17:00:00.000,2022,April,5,Tuesday,17,4,Town Hall (West),2966
3,4231869,2022-04-05T18:00:00.000,2022,April,5,Tuesday,18,4,Town Hall (West),2242
4,4231870,2022-04-05T19:00:00.000,2022,April,5,Tuesday,19,4,Town Hall (West),2008


In [4]:
# Data 2022 cleaning - data types

results_2022_df.dtypes

id               object
date_time        object
year             object
month            object
mdate            object
day              object
time             object
sensor_id        object
sensor_name      object
hourly_counts    object
dtype: object

In [5]:
# Data 2022 cleaning - data types
data_type_dict = {"id":int,"year":int,"mdate":int,"time":int,"hourly_counts":float}
results_2022_df = results_2022_df.astype(data_type_dict)

results_2022_df.dtypes

id                 int32
date_time         object
year               int32
month             object
mdate              int32
day               object
time               int32
sensor_id         object
sensor_name       object
hourly_counts    float64
dtype: object

In [6]:
# Data 2022 groupby
results_2022_df= results_2022_df[(results_2022_df["time"] >= 12) & (results_2022_df["time"] <= 15)]
location = results_2022_df.groupby("sensor_name")
foottrafic_2022 = location["hourly_counts"].sum()

results_2022_groupby_df = pd.DataFrame({"Midday foottrafic 2022": foottrafic_2022})
results_2022_groupby_df = results_2022_groupby_df.reset_index()
results_2022_groupby_df.rename(columns={"sensor_name":"location"},inplace=True)
results_2022_groupby_df

Unnamed: 0,location,Midday foottrafic 2022
0,231 Bourke St,388350.0
1,Alfred Place,113081.0
2,Birrarung Marr,19126.0
3,Bourke St - Spencer St (North),388290.0
4,Bourke St - Spencer St (South),32085.0
...,...,...
67,Town Hall (West),1068491.0
68,Victoria Point,51343.0
69,Waterfront City,126236.0
70,Webb Bridge,155322.0


In [7]:
# Getting location coordinate

Location_list = results_2022_groupby_df["location"].tolist()

location = []
latitude = []
longitude = []

for i in Location_list:
    try:
        base_url = "https://maps.googleapis.com/maps/api/geocode/json"
        target_location2 = i + ", Melbourne"
        params = {"address": target_location2, "key": g_key}
        response = requests.get(base_url, params=params)
        location_geo = response.json()
        lat = location_geo["results"][0]["geometry"]["location"]["lat"]
        lng = location_geo["results"][0]["geometry"]["location"]["lng"]
        print(f"{target_location2}: {lat}, {lng}")
        location.append(i)
        latitude.append(lat)
        longitude.append(lng)
    except:
        print("Target location not found")

231 Bourke St, Melbourne: -37.8133992, 144.9668301
Alfred Place, Melbourne: -37.8143404, 144.9701942
Birrarung Marr, Melbourne: -37.8187351, 144.9742624
Bourke St - Spencer St (North), Melbourne: -37.8170606, 144.9533436
Bourke St - Spencer St (South), Melbourne: -37.8171069, 144.9531785
Bourke St Bridge, Melbourne: -37.8159602, 144.9572025
Bourke St-Russell St (West), Melbourne: -37.8159602, 144.9572025
Bourke Street Mall (North), Melbourne: -37.8137685, 144.9644379
Bourke Street Mall (South), Melbourne: -37.8137685, 144.9644379
Chinatown-Lt Bourke St (South), Melbourne: -37.8132372, 144.9627782
Chinatown-Swanston St (North), Melbourne: -37.8058235, 144.9631138
Collins Place (North), Melbourne: -37.8144674, 144.9733256
Collins Place (South), Melbourne: -37.8144674, 144.9733256
Collins St (North), Melbourne: -37.8182937, 144.956676
Elizabeth St - Flinders St (East) - New footpath, Melbourne: -37.8181479, 144.9648712
Elizabeth St-La Trobe St (East), Melbourne: -37.8102887, 144.9614233
E

In [14]:
# create new df for location coordinate

location_dict = {
    "location": location,
    "Lat": latitude,
    "Lon": longitude,
    }
location_data_dict = pd.DataFrame(location_dict)

location_data_dict.head()

Unnamed: 0,location,Lat,Lon
0,231 Bourke St,-37.813399,144.96683
1,Alfred Place,-37.81434,144.970194
2,Birrarung Marr,-37.818735,144.974262
3,Bourke St - Spencer St (North),-37.817061,144.953344
4,Bourke St - Spencer St (South),-37.817107,144.953179


In [15]:
# merging foot traffic data and coord data

data_summary_2022 = location_data_dict.merge(results_2022_groupby_df,how="left", on="location" )
data_summary_2022.to_csv ("datasummary2022.csv")
data_summary_2022.head(10)

Unnamed: 0,location,Lat,Lon,Midday foottrafic 2022
0,231 Bourke St,-37.813399,144.96683,388350.0
1,Alfred Place,-37.81434,144.970194,113081.0
2,Birrarung Marr,-37.818735,144.974262,19126.0
3,Bourke St - Spencer St (North),-37.817061,144.953344,388290.0
4,Bourke St - Spencer St (South),-37.817107,144.953179,32085.0
5,Bourke St Bridge,-37.81596,144.957202,303478.0
6,Bourke St-Russell St (West),-37.81596,144.957202,388371.0
7,Bourke Street Mall (North),-37.813769,144.964438,1094592.0
8,Bourke Street Mall (South),-37.813769,144.964438,748678.0
9,Chinatown-Lt Bourke St (South),-37.813237,144.962778,203873.0


In [18]:
# creating google heatmap

# location & foot traffic input
lat_long = data_summary_2022[["Lat", "Lon"]]
foottrafic = data_summary_2022["Midday foottrafic 2022"]

# Plot Heatmap
fig = gmaps.figure()
heat_layer = gmaps.heatmap_layer(lat_long, weights=foottrafic, max_intensity=2000000)
fig.add_layer(heat_layer)
fig

Figure(layout=FigureLayout(height='420px'))

In [11]:
# Data for 2019, 2020, 2021
# 7:00 - 10:00 am
# 12:00 - 15:00 pm
# 18:00 - 21:00 pm
# summing hourly foottrafic

In [12]:
# get nearby restaurants
# add markers to identify nearby resto and advice on competition

In [13]:
# correlation
# foottrafic n eatery