In [88]:
import pandas as pd
from pathlib import Path
import plotly.express as px

In [89]:
def read_single_bitre_data_file(filepath):
    """Read BITRE data from filepath and return a pandas DataFrame."""
    df = pd.read_csv(
        filepath,
        engine="python",
    )
    year_str = filepath.split("/")[-1].split("-")[1]+"-01-31" # since BITRE is as of 31st Jan each year

    # drop rows for which any column is Total
    df = df.loc[~df.eq("Total").any(axis=1)]

    # drop rows for which postcode is UNKN 
    df = df.loc[~df.eq("UNKN").any(axis=1)]

    # keep only vehicle_type equal to Passenger vehicles or Light commercial vehicles
    df = df.loc[df["vehicle_type"].isin(["Passenger vehicles", "Light commercial vehicles"])] 

    # rename motive_power to Fuel Type
    df = df.rename(columns={"motive_power": "Fuel Type"})

    # rename registered_postcode to Postcode
    df = df.rename(columns={"registered_postcode": "Postcode"})

    # rename vehicle_type to Vehicle Type
    df = df.rename(columns={"vehicle_type": "Vehicle Type"})

    # rename state_abb to State
    df = df.rename(columns={"state_abb": "State"})

    # rename no_vehicle to year_str
    df = df.rename(columns={"no_vehicles": year_str})

    # replace NaN with 0 in year_str column
    df[year_str] = df[year_str].fillna(0)

    # make all State values uppercase
    df["State"] = df["State"].str.upper()

    # renamePassenger vehicles to Passenger
    df["Vehicle Type"] = df["Vehicle Type"].replace("Passenger vehicles", "Passenger")
    
    # rename Light commercial vehicles to Light Commercial
    df["Vehicle Type"] = df["Vehicle Type"].replace("Light commercial vehicles", "Light Commercial")

    # rename Battery/Fuel-cell electric to Electric
    df["Fuel Type"] = df["Fuel Type"].replace("Battery/Fuel-cell electric", "Electric")

    return df


In [90]:
def get_bitre_data():
    """Return a pandas DataFrame with BITRE data."""
    files = Path("../data/raw/BITRE").glob("*.csv")

    combined_df = pd.DataFrame()

    for f in files:
        df = read_single_bitre_data_file(str(f))

        if combined_df.empty:
            combined_df = df
        else:
            combined_df = combined_df.merge(df, how="outer")

    # fill NaN with 0 for all columns except Postcode, State, Vehicle Type, and Fuel Type
    combined_df = combined_df.fillna(0)

    # sort year columns
    cols = combined_df.columns.tolist()
    cols.sort(key=lambda x: x if x[0].isdigit() else "0000-00")
    combined_df = combined_df[cols]

    # save combined_df to csv
    combined_df.to_csv("../data/processed/bitre_df.csv", index=False)

    return combined_df


df = get_bitre_data()
df

Unnamed: 0,Vehicle Type,State,Postcode,Fuel Type,2021-01-31,2022-01-31,2023-01-31
0,Passenger,NSW,0810,Petrol,3.0,4.0,3.0
1,Passenger,NSW,0810,Diesel,0.0,0.0,0.0
2,Passenger,NSW,0812,Petrol,3.0,0.0,6.0
3,Passenger,NSW,0812,Diesel,0.0,0.0,0.0
4,Passenger,NSW,0820,Petrol,4.0,8.0,4.0
...,...,...,...,...,...,...,...
42432,Light Commercial,ACT,5018,Diesel,3.0,0.0,0.0
42433,Light Commercial,ACT,6100,Diesel,3.0,0.0,0.0
42434,Light Commercial,ACT,6149,Petrol,0.0,0.0,0.0
42435,Light Commercial,ACT,6726,Diesel,3.0,0.0,0.0


In [86]:
# print type of each column of df
for col in df.columns:
    print(col, type(df[col][0]))

Vehicle Type <class 'str'>
State <class 'str'>
Postcode <class 'str'>
Fuel Type <class 'str'>
2021-01 <class 'numpy.float64'>
2022-01 <class 'numpy.float64'>
2023-01 <class 'numpy.float64'>


<a style='text-decoration:none;line-height:16px;display:flex;color:#5B5B62;padding:10px;justify-content:end;' href='https://deepnote.com?utm_source=created-in-deepnote-cell&projectId=75de7644-8bd4-4ecc-bdb2-2c9ef0ed94e0' target="_blank">
 </img>
Created in <span style='font-weight:600;margin-left:4px;'>Deepnote</span></a>