## Imports

In [None]:
import math

import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn import metrics

## Load data

In [None]:
data_xlsx = pd.ExcelFile('factors.xlsx')
data_all_df = pd.read_excel(data_xlsx, 0)
data_all_df.reset_index(drop=True, inplace=True)
data_xlsx.close()
data_all_df

In [None]:
ebike_sales_df = data_all_df[['year', 'ebike_sales']]
ebike_sales_df.columns = ['year', 'y']
ebike_sales_df = ebike_sales_df[6:20]
ebike_sales_df

## Graphing & Pre-Analysis

In [None]:
ebike_gr = ebike_sales_df.plot(x='year', y='y', ylabel='thousands of units', title='e-bike sales in the UK', legend=None)

In [None]:
# _ = ebike_gr.figure
# _.figure.set_figwidth(8)
# _.figure.set_figheight(6)
# _.savefig('ebike.png', bbox_inches='tight', dpi=330)

## Simple Bi variate Analysis

In [None]:
def bivariate_stats(x: pd.Series, y: pd.Series):
    _pmcc = x.corr(y)
    _covr = x.cov(y)

    print(
            f'PMCC: {_pmcc} \n'
            f'r^2:  {_pmcc ** 2} \n'
            f'Cov:  {_covr}'
    )


def univariate_graphs(x: pd.Series):
    pd.plotting.autocorrelation_plot(x)
    plt.show()


def linear_reg_func(m: LinearRegression):
    _coef = m.coef_[0][0]
    _itcp = m.intercept_[0]
    print(f'Linear Regressed: y = {_coef}x {"+" if abs(_itcp) == _itcp else "-"} {abs(_itcp)}')

In [None]:
bivariate_stats(ebike_sales_df['year'], ebike_sales_df['y'])
_ = pd.plotting.autocorrelation_plot(ebike_sales_df['y'])
_.set_title('ebike ACF')

_ = plt.figure(1, figsize=(8, 6))

plt.show()

# _.savefig('co2_acf.png', bbox_inches='tight', dpi=330)

## Forecasting

In [None]:
PRED_YEARS = pd.DataFrame(range(2006, 2029))
PRED_YEARS.columns = ['year']

In [None]:
PRED_YEARS_PROPHET = PRED_YEARS.copy()
PRED_YEARS_PROPHET.columns = ['ds']
PRED_YEARS_PROPHET['ds'] = pd.to_datetime(PRED_YEARS_PROPHET['ds'], format='%Y')

In [None]:
def error_stats(x: pd.Series, y: pd.Series):
    _mse = metrics.mean_squared_error(x, y)
    _mae = metrics.mean_absolute_error(x, y)
    _mape = metrics.mean_absolute_percentage_error(x, y)
    _r2 = metrics.r2_score(x, y)

    print(
            f'MSE:  {_mse} \n'
            f'RMSE: {math.sqrt(_mse)} \n'
            f'R^2:  {_r2} \n'
            f'MAE:  {_mae} \n'
            f'MAPE: {_mape}%'
    )

### Linear

In [None]:
def line_func(x, a, b):
    return a * x + b

In [None]:
m_ebike_lr = LinearRegression()

m_ebike_lr.fit(ebike_sales_df[['year']], ebike_sales_df[['y']])
pmt_ebike_lr = [m_ebike_lr.coef_, m_ebike_lr.intercept_]

ebike_pred_lr = m_ebike_lr.predict(PRED_YEARS)
ebike_pred_lr = pd.DataFrame(ebike_pred_lr)
ebike_pred_lr.columns = ['y']

In [None]:
bivariate_stats(ebike_sales_df['y'], ebike_pred_lr['y'])
error_stats(ebike_sales_df['y'], ebike_pred_lr['y'][:14])

In [None]:
plt.plot(PRED_YEARS, ebike_pred_lr, label='linear')
plt.plot(ebike_sales_df['year'], ebike_sales_df['y'], label='actual')
plt.title('e-bike sales in the UK')
plt.ylabel('thousands of units')
plt.xlabel('year')
plt.legend()
_ = plt.figure(1, figsize=(8, 6))
plt.show()

# _.savefig('co2_line.png', bbox_inches='tight', dpi=330)

### Exponential

In [None]:
from scipy.optimize import curve_fit


In [None]:
def exp_func(x, a, b, c):
    return 2 ** ((x + a) * b) + c

In [None]:
pmt_ebike_exp, cov = curve_fit(exp_func, ebike_sales_df['year'], ebike_sales_df['y'], p0=[-2000, 0.3, 10])
print(pmt_ebike_exp)

In [None]:
ebike_pred_exp = exp_func(PRED_YEARS, *pmt_ebike_exp)
ebike_pred_exp.columns = ['y']

In [None]:
bivariate_stats(ebike_sales_df['y'], ebike_pred_exp['y'])
error_stats(ebike_sales_df['y'], ebike_pred_exp['y'][:14])

print(f'Exponential Regressed: y = e^((x - {abs(pmt_ebike_exp[0]):.5}) * {pmt_ebike_exp[1]:.5}) + {pmt_ebike_exp[2]:.5}')

In [None]:
plt.plot(PRED_YEARS, ebike_pred_exp, label='exponential')
plt.plot(PRED_YEARS, ebike_pred_lr, label='linear', alpha=0.6)
plt.plot(ebike_sales_df['year'], ebike_sales_df['y'], label='actual')
plt.title('e-bike sales in the UK')
plt.ylabel('thousands of units')
plt.xlabel('year')
plt.ylim(top=10_000)
plt.legend()
_ = plt.figure(1, figsize=(8, 6))
plt.show()

# _.savefig('co2_exp.png', bbox_inches='tight', dpi=330)

### Logistic

In [None]:
def logi_func(x, x0, k, d):
    return 9000 / (1 + np.exp(-k * (x - x0))) + d

In [None]:
pmt_ebike_logi, cov = curve_fit(logi_func, ebike_sales_df['year'], ebike_sales_df['y'], p0=[2021, 0.3, 140], maxfev=100_000)
print(pmt_ebike_logi)

In [None]:
ebike_pred_logi = logi_func(PRED_YEARS, *pmt_ebike_logi)
ebike_pred_logi.columns = ['y']

In [None]:
bivariate_stats(ebike_sales_df['y'], ebike_pred_logi['y'])
error_stats(ebike_sales_df['y'], ebike_pred_logi['y'][:14])
print(
        f'Logistic Regressed: '
        f'{9000} / '
        f'(1 + '
        f'e^(-{pmt_ebike_logi[1]:.5} * (x - {pmt_ebike_logi[0]:.5}))'
        f') + {pmt_ebike_logi[2]}'
)

In [None]:
plt.plot(PRED_YEARS, ebike_pred_exp, label='exponential')
plt.plot(PRED_YEARS, ebike_pred_lr, label='linear')
plt.plot(PRED_YEARS, ebike_pred_logi, label='logistic')
plt.plot(ebike_sales_df['year'], ebike_sales_df['y'], label='actual')
plt.title('e-bike sales in the UK')
plt.ylabel('thousands of units')
plt.xlabel('year')
plt.ylim(top=10_000)
plt.legend()
plt.show()

### Sigmoid

In [None]:
def sigm_func(x, slope, exp, x_ctr, y_min, y_max):
    return ((x - x_ctr) / (
            slope**-exp + (2 * abs(x - x_ctr) / (y_max - y_min))**exp
    ) ** (1 / exp)) + (y_max - y_min) / 2 + y_min


optim_pmt_man = [1111, 1.3, 2021, -630, 10_000]


def sigm_func_lim(y_max=10_000):
    def wrap(x, slope, exp, x_ctr, y_min):
        return sigm_func(x, slope, exp, x_ctr, y_min, y_max)
    return wrap

In [None]:
pmt_co2_sigm, cov = curve_fit(sigm_func_lim(), ebike_sales_df['year'], ebike_sales_df['y'], p0=optim_pmt_man[:-1], maxfev=69420)
print(pmt_co2_sigm)

In [None]:
ebike_pred_sigm = sigm_func(PRED_YEARS, *pmt_co2_sigm, 10_000)
ebike_pred_sigm.columns = ['y']

In [None]:
bivariate_stats(ebike_sales_df['y'], ebike_pred_sigm['y'])
error_stats(ebike_sales_df['y'], ebike_pred_sigm['y'][:14])

In [None]:
plt.plot(PRED_YEARS, ebike_pred_sigm, label='sigmoid, L=10k')
plt.plot(ebike_sales_df['year'], ebike_sales_df['y'], label='actual')
plt.title('e-bike sales in the UK')
plt.ylabel('thousands of units')
plt.xlabel('year')
plt.legend()
_ = plt.figure(1, figsize=(8, 6))
plt.show()

# _.savefig('co2_sigm_565.png', bbox_inches='tight', dpi=330)

### Tilted Sigmoid

In [None]:
def tilted_sig_func(x):
    x_ctr = 2019.26
    slope = 690.991
    y_min = 2030.02
    y_max = 7000.
    exp = 2.01
    t = 83.

    return ((x - x_ctr) / (
            slope**-exp + (2 * abs(x - x_ctr) / (y_max - y_min))**exp
    ) ** (1 / exp)) + (y_max - y_min) / 2 + y_min + t * x - t * y_min

In [None]:
ebike_pred_ts = tilted_sig_func(PRED_YEARS)
ebike_pred_ts.columns = ['y']
ebike_pred_ts

In [None]:
bivariate_stats(ebike_sales_df['y'], ebike_pred_ts['y'])
error_stats(ebike_sales_df['y'], ebike_pred_ts['y'][:14])

In [None]:
plt.plot(PRED_YEARS, ebike_pred_exp, label='exponential')
plt.plot(PRED_YEARS, ebike_pred_lr, label='linear')
plt.plot(PRED_YEARS, ebike_pred_logi, label='logistic')
plt.plot(PRED_YEARS, ebike_pred_ts, label='tilted_sig')
plt.scatter(ebike_sales_df['year'], ebike_sales_df['y'], label='actual')
plt.title('e-bike sales in the UK')
plt.ylabel('thousands of units')
plt.xlabel('year')
plt.ylim(bottom=0, top=10_000)
plt.legend()
plt.show()


In [None]:
e = tilted_sig_func(pd.DataFrame(range(2020,2029)))
e.columns = ['y']
e

In [None]:
for i in range(len(e['y'])):
    print(sum(e['y'][:i]))
