In [None]:
# Preamble
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.optimize as so
import statsmodels.api as sm
import statsmodels.formula.api as smf

run_data = pd.read_csv("runningSpeeds.csv")
run_data.head()

# Data Exploration

## One-dimensional - Distribution Plots

In [None]:
# Histogram
sns.distplot(run_data.pace, kde=False)

In [None]:
# Histogram - specify # bins
sns.distplot(run_data.pace, bins = 15, kde=False)

In [None]:
# Density estimate
sns.distplot(run_data.pace, hist=False)

In [None]:
# Both histogram and density estimate
sns.distplot(run_data.pace)

## Two-dimensional - Joint Plots

In [None]:
# Joint distribution plot
sns.jointplot(x="age", y="pace", data=run_data)

In [None]:
# Joint distribution plot with estimated density
sns.jointplot(x="age", y="pace", data=run_data, kind="kde")

In [None]:
# Joint distribution plot with regression line
sns.jointplot(x="age", y="pace", data=run_data, kind="reg")

In [None]:
# Joint distribution plot with hexes
sns.jointplot(x="age", y="pace", data=run_data, kind="hex", gridsize=15)

# Maximum Likelihood Example - Laplace Distribution

In [None]:
# Generate data

n = 10001
normalData = pd.DataFrame({"data": pd.Series(np.random.normal(0,1,n))})
laplaceData = pd.DataFrame({"data": pd.Series(np.random.laplace(0,1/np.sqrt(2),n))})

# Plot
sns.distplot(normalData)
sns.distplot(laplaceData)

In [None]:
#Zoom in

sns.distplot(normalData)
sns.distplot(laplaceData)
plt.xlim(2,6)
plt.ylim(0,0.1)

In [None]:
## Laplace Negative Log Likelihood
def laplaceNegLogLikelihood(mu, b, y):
    neg_log_lik = -np.sum(np.log(2*b) - np.abs(y - mu) / b)
    return neg_log_lik

In [None]:
## Laplace Maximum Likelihood Estimate for mu
def maximumLikelihood(y):
    # For this demonstration I am fixing b = 1
    RES = so.minimize(laplaceNegLogLikelihood, 1, args=(1,y), method="Powell", tol=1e-8)
    print(RES)
    return RES.x

maximumLikelihood(laplaceData.values)

In [None]:
# Should give same answer as median!
laplaceData.median()

In [None]:
# Laplace Negative Log Likelihood for regression
def laplaceRegNegLogLikelihood(beta, X, y):
    mu = (X@beta).reshape(-1,1)
    return laplaceNegLogLikelihood(mu, 1, y)

In [None]:
# Function to maximize regression log likelihood
def maximumRegLikelihood(X, y, negloglik=laplaceRegNegLogLikelihood):
    ### BEGIN SOLUTION
    nrows,ncols = X.shape
    betas=np.zeros((ncols,1))
    RES = so.minimize(negloglik, betas, args=(X,y), method="Powell", tol=1e-8)
    print(RES)
    return RES.x

age = run_data.age.values
X = np.c_[np.ones(age.size), age]
y = run_data.pace.values.reshape(-1,1)
b,loss = maximumRegLikelihood(X,y)
   

In [None]:
# Check against statsmodels - median regression
sad_fit = smf.quantreg('pace ~ age', data = run_data).fit(q = 0.5)
sad_fit.summary()