# install all packages we need

In [23]:
pack = ["CSV", "DataFrames", "Statistics", "Plots", "Distributions", "StatsPlots", "GLM", "MLBase", "StatsBase", "StatsModels", "RegressionTables"]

using Pkg; Pkg.add(pack)

[32m[1m    Updating[22m[39m registry at `~/.julia/registries/General`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m    Updating[22m[39m `~/.julia/environments/v1.6/Project.toml`
 [90m [10745b16] [39m[92m+ Statistics[39m
[32m[1m  No Changes[22m[39m to `~/.julia/environments/v1.6/Manifest.toml`


# load all Julia regression packages

In [24]:
using Pkg, CSV, DataFrames, Statistics, Plots, Distributions, StatsPlots, GLM, MLBase, StatsBase, StatsModels, RegressionTables

# now we load the data into Julia

In [25]:
df = CSV.read("hprice1.csv", DataFrame);

# to make sure the data is loaded correctly, check all the variable names in your file

In [26]:
names(df)

10-element Vector{String}:
 "price"
 "assess"
 "bdrms"
 "lotsize"
 "sqrft"
 "colonial"
 "lprice"
 "lassess"
 "llotsize"
 "lsqrft"

# to make sure the data is loaded correctly, check the summary statistics of each variable

In [27]:
describe(df)

Unnamed: 0_level_0,variable,mean,min,median,max,nunique,nmissing,eltype
Unnamed: 0_level_1,Symbol,Float64,Real,Float64,Real,Nothing,Nothing,DataType
1,price,293.546,111.0,265.5,725.0,,,Float64
2,assess,315.736,198.7,290.2,708.6,,,Float64
3,bdrms,3.56818,2.0,3.0,7.0,,,Int64
4,lotsize,9019.86,1000.0,6430.0,92681.0,,,Int64
5,sqrft,2013.69,1171.0,1845.0,3880.0,,,Int64
6,colonial,0.693182,0.0,1.0,1.0,,,Int64
7,lprice,5.63318,4.70953,5.58161,6.58617,,,Float64
8,lassess,5.71799,5.2918,5.67057,6.56329,,,Float64
9,llotsize,8.9051,6.90776,8.76872,11.4369,,,Float64
10,lsqrft,7.57261,7.06561,7.52023,8.26359,,,Float64


# all good, now we can compute regression models

---

# Q1.a

# $$ \mathrm{price} = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \mathrm{lotsize} + \beta_3 \cdot \mathrm{sqrft} + u $$

In [28]:
fm1a = @formula(price ~ 1 + bdrms + lotsize + sqrft)
M1a = lm(fm1a, df)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

price ~ 1 + bdrms + lotsize + sqrft

Coefficients:
────────────────────────────────────────────────────────────────────────────────────
                    Coef.    Std. Error      t  Pr(>|t|)      Lower 95%    Upper 95%
────────────────────────────────────────────────────────────────────────────────────
(Intercept)  -21.7703      29.475        -0.74    0.4622  -80.3847       36.844
bdrms         13.8525       9.01015       1.54    0.1279   -4.06514      31.7702
lotsize        0.00206771   0.000642126   3.22    0.0018    0.000790769   0.00334464
sqrft          0.122778     0.0132374     9.28    <1e-13    0.0964541     0.149102
────────────────────────────────────────────────────────────────────────────────────

---

# Q1.b

# $$ \ln \left( \mathrm{price} \right) = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \ln \left( \mathrm{lotsize} \right) + \beta_3 \cdot \ln \left( \mathrm{sqrft} \right) + u $$

In [29]:
fm1b = @formula(lprice ~ 1 + bdrms + llotsize + lsqrft)
M1b = lm(fm1b, df)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

lprice ~ 1 + bdrms + llotsize + lsqrft

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%    Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)  -1.29704     0.651284   -1.99    0.0497  -2.59219    -0.00189296
bdrms         0.0369584   0.0275313   1.34    0.1831  -0.0177906   0.0917074
llotsize      0.167967    0.0382811   4.39    <1e-04   0.0918404   0.244093
lsqrft        0.700232    0.0928652   7.54    <1e-10   0.51556     0.884905
─────────────────────────────────────────────────────────────────────────────

---

# Q1.c

# $$ \ln \left( \mathrm{price} \right) = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \ln \left( \mathrm{lotsize} \right) + \beta_3 \cdot \ln \left( \mathrm{sqrft} \right) + \beta_4 \cdot \ln \left( \mathrm{assess} \right) + u $$

In [30]:
fm1c = @formula(lprice ~ 1 + bdrms + llotsize + lsqrft + lassess)
M1c  = lm(fm1c, df)

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

lprice ~ 1 + bdrms + llotsize + lsqrft + lassess

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.263743     0.569665    0.46    0.6446  -0.869297   1.39678
bdrms         0.0338392    0.0220983   1.53    0.1295  -0.0101135  0.0777919
llotsize      0.00743793   0.0385615   0.19    0.8475  -0.0692593  0.0841352
lsqrft       -0.103238     0.13843    -0.75    0.4579  -0.378571   0.172094
lassess       1.04307      0.151446    6.89    <1e-08   0.741845   1.34429
────────────────────────────────────────────────────────────────────────────

---

# compare two models in the same regression table

In [31]:
regtable(M1b, M1c; renderSettings = asciiOutput())


---------------------------------
                     lprice      
              -------------------
                   (1)        (2)
---------------------------------
(Intercept)    -1.297*      0.264
               (0.651)    (0.570)
bdrms            0.037      0.034
               (0.028)    (0.022)
llotsize      0.168***      0.007
               (0.038)    (0.039)
lsqrft        0.700***     -0.103
               (0.093)    (0.138)
lassess                  1.043***
                          (0.151)
---------------------------------
Estimator          OLS        OLS
---------------------------------
N                   88         88
R2               0.643      0.773
---------------------------------




---

# the changes after including $\mathrm{lassess}$ is due to the sample correlation between $\mathrm{lassess}$ and other X variables
# larger the absolute value of the correlation, larger the change
# you can check the sample correlation as follows

In [32]:
cor(df.lassess, df.bdrms)

0.4587438924682411

In [33]:
cor(df.lassess, df.llotsize)

0.5577345686038575

In [34]:
cor(df.lassess, df.lsqrft)

0.8646643451572996

# export codes into HTML files

In [35]:
run(`rm -rf T7_Julia.html`)
run(`jupyter nbconvert --to html T7_Julia.ipynb`)

[NbConvertApp] Converting notebook T7_Julia.ipynb to html
[NbConvertApp] Writing 599878 bytes to T7_Julia.html


Process(`[4mjupyter[24m [4mnbconvert[24m [4m--to[24m [4mhtml[24m [4mT7_Julia.ipynb[24m`, ProcessExited(0))