# mpg

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

Constants

In [None]:
win_size = 15          # size of the rolling window for calculating mpg
l_per_gal = 4.54       # litres in a gallon

## Read in CSV of mpg

In [None]:
mpg_csv = pd.read_csv('mpg.csv', header=0, index_col = 0, parse_dates=True, dayfirst=True, names=['Mileage', 'Fuel', 'Cost', 'Unit price'])
print('Input data\n----------')
print(mpg_csv.head())

# validate input
mpg_csv['check_sum'] = (mpg_csv['Fuel'] * mpg_csv['Unit price']).round(decimals=2)
mpg_csv['invalid'] = mpg_csv['Cost'] != mpg_csv['check_sum']
invalid_mpg = mpg_csv[mpg_csv['invalid']]
print('\nInvalid entries\n---------------')
print(invalid_mpg)

In [None]:
# parse to dates as the index
# mpg_csv['Date'] =  pd.to_datetime(mpg_csv['Date'], format='%d/%m/%y')

In [None]:
mpg_csv['mpg'] = (l_per_gal * mpg_csv['Mileage'].diff(periods=win_size-1)/mpg_csv['Fuel'].rolling(window=win_size).sum()).round(decimals=2)
print(mpg_csv.head(2*win_size))
# to check calculate against here against calculation in original Excel
#mpg_csv.to_csv('mpg_new.csv')

In [None]:
sns.set()
mpg_csv['mpg'][win_size-1:].plot(figsize=(12,8))
_ = plt.xlabel('date')
_ = plt.ylabel('mpg')
_ = plt.ylim((30,65))
#_ = plt.axis('tight')
# TODO more uniform sampling - does this go back to (upsampling) resampling and interpolation of eariler data
dates = mpg_csv.index.to_numpy()[::win_size]
labels = np.datetime_as_string(dates, unit='M')
plt.xticks(dates, labels, rotation=60)
#plt.grid(True)
plt.show()

Annual mileage

In [None]:
annual_mileage = pd.DataFrame(mpg_csv['Mileage'].resample('D').interpolate('time'))
annual_mileage['Annual mileage'] = annual_mileage.diff(periods=365)
annual_mileage = annual_mileage.resample('M').min()
annual_mileage = annual_mileage.drop('Mileage', axis=1)
annual_mileage = annual_mileage[12:]                      # remove NaN rows
print(annual_mileage.head())
sns.set()
annual_mileage.plot(figsize=(12,8))
_ = plt.xlabel('date')
_ = plt.ylabel('mileage')

Monthly fuel spend

In [None]:
monthly_spend = pd.DataFrame(mpg_csv['Cost'].resample('M').sum())
print(monthly_spend.head())
sns.set()
monthly_spend.plot(figsize=(18,6))
_ = plt.xlabel('date')
_ = plt.ylabel('fuel spend')