# CYCLING IN SYDNEY

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os
import math
from datetime import datetime

In [None]:
%matplotlib inline

In [None]:
plt.rcParams['figure.figsize'] = (10, 5)

In [None]:
saved_style_state = matplotlib.rcParams.copy()

## 1. importing data

In [None]:
if os.path.isfile("Cycleway Counts_01Jan18_11Sept20.xlsx"):
    filepath = "Cycleway Counts_01Jan18_11Sept20.xlsx"
    print("loading from file")
else:
    filepath = "https://opendata.transport.nsw.gov.au/system/files/resources/Cycleway%20Counts_01Jan18_11Sept20.xlsx"
    print("loading from the internet")

cycleway_usage = pd.read_excel(filepath, sheet_name="daily counts")
print("done")


In [None]:
cycleway_usage.head()

# 1.a ben's assistance

In [None]:
dir_lookup = ["North", "East", "South", "West", "All Directions"]
def get_mode(index):
    if index<5:
        return "Cycling"
    else:
        return "Pedestrian"

locations_sparse = [x for x in cycleway_usage.columns if "Unnamed" not in x ]

In [None]:
def get_loc(i, response_type="all"):
    try:
        loc = locations_sparse[math.ceil(i/15)]
        if response_type == "all":
            return loc
        elif response_type == "code":
            return loc.split("-")[0].strip()
        elif response_type == "name":
            return loc.split("-")[1].strip()
        # You can extend this to pull out the notes, like (Cycleway and Pedestrian counter), but they're pretty inconsistent
        else:
            print(f"`{response_type}` is not implemented yet")
    except:
        print("er")


In [None]:
new_rows = []
for i, row in cycleway_usage.iterrows():
    if i < 3:
        continue  # This feels nasty

    date = row[0]
    for loc_index in range(1, len(row) - 15, 15):
        section = row[
            loc_index : loc_index + 10
        ]  # 10 because we don't care about the Sum (All Transport Modes) section
        for col_index, col in enumerate(section):
            dir = dir_lookup[col_index % 5]
            if type(col) is int and col != 0 and dir != "All Directions":
                r = {
                    "date": date,
                    "location_name": get_loc(loc_index, "name"),
                    "station": get_loc(loc_index, "code"),
                    "mode": get_mode(col_index),
                    "direction": dir,
                    "count": int(col),
                }
                new_rows.append(r)

# 1.b converting datetime

In [None]:
time_df = pd.DataFrame(new_rows)
time_df['date'] = pd.to_datetime(t_df['date'], infer_datetime_format=True)
time_df.sort_values(by='date', inplace=True) 

In [None]:
print(time_df.shape)
time_df.head() 

### Questions
* Who is travelling into the city, out of the city?
* Weekdays vs weekend? See comparison, poor connections? Limitations of only pathways/single counter?

## 2. data visualisation over time
* per day/ per month bar chart
* section off weekends?
* weekdays?

## 3. data visualisation linked to location
* plot points on a map
* set size relation to counter (int)
* set colour/arrow relation to counter/direction
* where is there a lot of cycling? 

In [4]:
if os.path.isfile("cyclecounters-Oct-2019.csv"):
    counterfilepath = "cyclecounters-Oct-2019.csv"
    print("loading from file")
else:
    counterfilepath = "https://opendata.transport.nsw.gov.au/node/6771/download"
    print("loading from the internet")

cycleway_counterlocation = pd.read_csv(counterfilepath)
print("done")

loading from file


NameError: name 'pd' is not defined

## 4. next steps?
* import datasets from Strava, Google Maps, layer over data
* understand why cycling is used: recreation/commuting?
* understand why cycling is *NOT* used
* start playing with cycle pathways?
* feed into research: making Parramatta road a cycleway