# <font color = blue>Lesson \#4: Basic OLS: The Corvette Problem</font>

# Data Dictionary

| Variable | Values   | Source | Mnemonic |
|----------|----------|--------|---------|
| Age | Years | Kelley's Blue Book, various issues | age |
| Price | Nominal dollars | IBID. | price |


# Basic Data Loading and Examination

In [None]:
##
## import packages
##
import numpy as np
import pandas as pd
##
## for modeling
## notice the new import command for the formula API and the summary option
##
import statsmodels.api as sm
import statsmodels.formula.api as smf 
from statsmodels.iolib.summary2 import summary_col
##
## for plotting
##
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
##
## import corvette data from a csv file
## notice the definition of a path variable
##
path = r'C:/Users/Owner/Documents/Teaching/Rutgers/ECMT322/labs/Data/'
df = pd.read_csv( path + 'corvettes.csv' )
df.head()

In [None]:
##
## get basic descriptive statistics
##
df.describe().T

In [None]:
## 
## scatter plot
##
## notice the new plotting option "kind = 'scatter'"
##
ax = df.plot( x = 'age', y = 'price', legend = False, kind = 'scatter' )
##
## set labels and title
##
ax.set( xlabel = 'Age', ylabel = 'Price', title = 'Price vs. Age' )

## Basic OLS

In [None]:
## 
## ols
##
## there are four steps for estimatng a model:
##   1. define a formula (i.e., the specific model to estimate)
##   2. instantiate the model (i.e., specify it)
##   3. fit the model
##   4. summarize the fitted model
##
## Step 1: define a formula
##
formula = 'price ~ age'
##
## Step 2: instantiate the ols model
##
mod = smf.ols( formula, data = df )
##
## Step 3: fit the instantiated model
##
reg01 = mod.fit()
##
## Step 4: summarize the fitted model
##
reg01.summary()

## Retrieving/Printing Key OLS Results

In [None]:
##
## retrieve the estimated parameters
##
reg01.params

In [None]:
##
## retrieve the sum of squares residuals or SSE
## NOTE: 'ssr' stands for 'sum of sqaured residuals' in this program but I'll call it sse
##
sse = reg01.ssr
sse

In [None]:
##
## alternatively, you could calculate the sse directly
## notice the Round( ) function
##
residuals_squared = reg01.resid**2
round( residuals_squared.sum(), 2 )

In [None]:
##
## calculate the standard error of the regression
##
se_reg = np.sqrt( sse/( reg01.nobs - 2 ) )
round( se_reg, 2 )

In [None]:
## 
## calculate SXX
##
sxx = ( ( df.age - df.age.mean() ) **2 ).sum()
print( sxx )
## 
## alternativesly
##
sxx_alt = df.age.var() * 9
round( sxx_alt, 2 )

In [None]:
##
## calculate standard error of the beta 1 estimate
##
se_beta_1 = se_reg/np.sqrt( sxx )
se_beta_1
##
## this agrees with the table output
##

In [None]:
##
## check that sum of residuals is zero
##
round( reg01.resid.sum(), 4 )

# Stat 101 Digression

In [None]:
##
## define a formula with only a constant
##
formula = 'price ~ 1'
##
## instantiate the ols model
##
mod02 = smf.ols( formula, data = df )
##
## fit the instantiated model
##
reg02 = mod02.fit()
##
## summarize the fitted model
##
reg02.summary()

In [None]:
##
## double check the descriptives
##
df.describe().T

## Calculating an Elasticity

In [None]:
##
## retrieve the estimated parameter
##
dYdX = reg01.params[ 1 ]
##
## multiple the parameter estimate by the ration of the means
##
eta = dYdX * ( df.age.mean()/df.price.mean() )
print( 'eta = ', round( eta, 4 ) )

### Elasticity Summary Table for Linear Model

| Variable | Estimate | Mean   | Elasticity | Interpretation |
|----------|----------|--------|------------|----------------|
| Age      | -27.9029 | 4.10   | -0.4448    | Inelastic      |

## Goodness-of-Fit

In [None]:
##
## ANOVA table
##
## import the anova functions
##
from statsmodels.stats.anova import anova_lm
##
## run anova for first model
##
anova_lm( reg01 )

In [None]:
##
## anova for the constant-only model
##
anova_lm(reg02)

In [None]:
##
## do f-test of regression model with age vs model without age (constant-only or naive or restricted model)
##
f_val, p_val, _ = reg01.compare_f_test(reg02)
##
## print results
##
print( 'F = ', round( f_val, 2 ), '\np-Value = ', round( p_val, 4 ), '\nTest: ', p_val < 0.01 )

In [None]:
##
## check the R**2
##
round( reg01.rsquared, 3 )

## Model Portfolio

In [None]:
##
## create summary table
## notice formatting statements
##
summary_col( [ reg01, reg02 ], stars = True, 
           info_dict = {'n': lambda x: "{0:d}".format( int( x.nobs )),
                        'R2': lambda x: "{:0.3f}".format( x.rsquared ),
})