In [10]:
### Regression
### Input: Fuel consumption. Raw data: https://github.com/gsoh/VED
### Tool: numpy.polynomial

### Instructions
### Download `regression_emission_data.zip` from https://tuwienacat-my.sharepoint.com/:u:/g/personal/bingyu_zhao_tuwien_ac_at/EZ22m3LbIM9Lp7QJlWiOKYYB_JVX-V3t3v84sYOZ7SPNFw
### Do not unzip.
### Upload `regression_emission_data.zip` to the left panel.
### Your left panel should look like the image shown here: https://github.com/bz247/bz247_course_materials/blob/main/methods_and_models_2022W/figs/regression_data_upload_instruction.png

### Step 0. Import necessary packages

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import numpy.polynomial.polynomial as poly

### Step 1. Import data

In [None]:
!unzip regression_emission_data.zip

In [None]:
emission_data = pd.read_csv('regression_emission_data/regression_emission_data/regression_data.csv')
emission_data.head()

In [None]:
emission_data.groupby('maxspeed_range').size()

In [None]:
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(emission_data['link_average_speed_kmph'], 
           emission_data['link_average_fuel_lpm'], s=1, alpha=0.1)
ax.set_xlabel('Link average speed (km/h)')
ax.set_ylabel('Link average fuel consumption (liter/mile)')
ax.set_ylim([0, 1])

### Step 2. Basic regression

In [None]:
#polynomial fit with degree = 2
emission_data['x'] = emission_data['link_average_speed_kmph']
emission_data['y'] = emission_data['link_average_fuel_lpm']
coefs = poly.polyfit(emission_data['x'], 
           emission_data['y'], 2)
ffit = poly.Polynomial(coefs) 
print(coefs)

#add fitted polynomial line to scatterplot
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(emission_data['link_average_speed_kmph'], 
           emission_data['link_average_fuel_lpm'], s=1, alpha=0.1)
polyline = np.linspace(1, 140, 50)
plt.plot(polyline, ffit(polyline), c='r')

ax.set_xlabel('Link average speed (km/h)')
ax.set_ylabel('Link average fuel consumption (liter/mile)')
ax.set_ylim([0, 1])

In [None]:
#polynomial fit with degree = 2
emission_data['x'] = emission_data['link_average_speed_kmph']/100
emission_data['y'] = np.log(emission_data['link_average_fuel_lpm'])
coefs = poly.polyfit(emission_data['x'], 
           emission_data['y'], 2)
ffit = poly.Polynomial(coefs) 
print(coefs)

#add fitted polynomial line to scatterplot
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(emission_data['link_average_speed_kmph'], 
           emission_data['link_average_fuel_lpm'], s=1, alpha=0.1)
polyline = np.linspace(1, 140, 50)
plt.plot(polyline, np.exp(ffit(polyline/100)), c='r')

ax.set_xlabel('Link average speed (km/h)')
ax.set_ylabel('Link average fuel consumption (liter/mile)')
ax.set_ylim([0, 1])

### Step 3. Regression by speed range

In [None]:
#polynomial fit with degree = 2
emission_data['x'] = emission_data['link_average_speed_kmph']/100
emission_data['y'] = np.log(emission_data['link_average_fuel_lpm'])

#add fitted polynomial line to scatterplot
fig, ax = plt.subplots(figsize=(10, 8))
ax.scatter(emission_data['link_average_speed_kmph'], 
           emission_data['link_average_fuel_lpm'], s=1, alpha=0.1, c='lightgray')

for max_speed in [25, 30, 35, 40, 45, 70]:
    subdata = emission_data[emission_data['maxspeed_range']==max_speed]
    coefs = poly.polyfit(subdata['x'], subdata['y'], 2)
    ffit = poly.Polynomial(coefs) 
    print(coefs)
    polyline = np.linspace(1, max_speed*1.5, 50)
    plt.plot(polyline, np.exp(ffit(polyline/100)), label='{} mph'.format(max_speed))

ax.set_xlabel('Link average speed (km/h)')
ax.set_ylabel('Link average fuel consumption (liter/mile)')
ax.set_ylim([0, 1])
ax.legend()