# check the name of your current working directory

In [1]:
pwd

/Users/ning/Documents/testfield


# list all the files in your working directory and see whether you have "hprice1.dta" file

In [2]:
ls


total 1248
-rw-r--r--  1 ning  staff  605910 Oct  5 15:35 T7_Stata.html
-rw-r--r--  1 ning  staff   18898 Oct  5 15:35 T7_Stata.ipynb
-rw-r--r--@ 1 ning  staff    5893 Aug 21 21:07 hprice1.csv
-rw-r--r--@ 1 ning  staff    4070 Aug 21 21:07 hprice1.dta


# clearly, you see the file listed; now we can load the data into Stata

In [3]:
use hprice1.dta, clear

# to make sure the data is loaded correctly, check all the variable names in your file

In [4]:
ds

price     bdrms     sqrft     lprice    llotsize
assess    lotsize   colonial  lassess   lsqrft


# to make sure the data is loaded correctly, check the summary statistics of each variable

In [5]:
sum


    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
       price |         88     293.546    102.7134        111        725
      assess |         88    315.7364    95.31444      198.7      708.6
       bdrms |         88    3.568182    .8413926          2          7
     lotsize |         88    9019.864    10174.15       1000      92681
       sqrft |         88    2013.693    577.1916       1171       3880
-------------+---------------------------------------------------------
    colonial |         88    .6931818    .4638161          0          1
      lprice |         88     5.63318    .3035727    4.70953   6.586172
     lassess |         88    5.717994    .2621131   5.291796   6.563291
    llotsize |         88    8.905104    .5440601   6.907755   11.43692
      lsqrft |         88     7.57261    .2586883   7.065613   8.263591


# all good, now we can compute regression models

---

# Q1.a

# $$ \mathrm{price} = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \mathrm{lotsize} + \beta_3 \cdot \mathrm{sqrft} + u $$

In [6]:
reg price bdrms lotsize sqrft


      Source |       SS           df       MS      Number of obs   =        88
-------------+----------------------------------   F(3, 84)        =     57.46
       Model |  617130.701         3  205710.234   Prob > F        =    0.0000
    Residual |  300723.805        84   3580.0453   R-squared       =    0.6724
-------------+----------------------------------   Adj R-squared   =    0.6607
       Total |  917854.506        87  10550.0518   Root MSE        =    59.833

------------------------------------------------------------------------------
       price |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       bdrms |   13.85252   9.010145     1.54   0.128    -4.065141    31.77018
     lotsize |   .0020677   .0006421     3.22   0.002     .0007908    .0033446
       sqrft |   .1227782   .0132374     9.28   0.000     .0964541    .1491022
       _cons |  -21.77031   29.47504    -0.74   0.

---

# Q1.b

# $$ \ln \left( \mathrm{price} \right) = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \ln \left( \mathrm{lotsize} \right) + \beta_3 \cdot \ln \left( \mathrm{sqrft} \right) + u $$

In [7]:
reg lprice bdrms llotsize lsqrft


      Source |       SS           df       MS      Number of obs   =        88
-------------+----------------------------------   F(3, 84)        =     50.42
       Model |  5.15504028         3  1.71834676   Prob > F        =    0.0000
    Residual |  2.86256324        84  .034078134   R-squared       =    0.6430
-------------+----------------------------------   Adj R-squared   =    0.6302
       Total |  8.01760352        87  .092156362   Root MSE        =     .1846

------------------------------------------------------------------------------
      lprice |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       bdrms |   .0369584   .0275313     1.34   0.183    -.0177906    .0917074
    llotsize |   .1679667   .0382812     4.39   0.000     .0918404     .244093
      lsqrft |   .7002324   .0928652     7.54   0.000     .5155597    .8849051
       _cons |  -1.297042   .6512836    -1.99   0.

# save the slopes for $\mathrm{bdrms}$, $\ln \left( \mathrm{lotsize} \right)$ and $\ln \left( \mathrm{sqrft} \right)$ for later comparison

In [8]:
scalar b_bdrms_M1b    = _b[bdrms]
scalar b_llotsize_M1b = _b[llotsize]
scalar b_lsqrft_M1b   = _b[lsqrft]

# check the slopes you just saved

In [9]:
scalar list

b_lsqrft_M1b =  .70023241
b_llotsize_M1b =   .1679667
b_bdrms_M1b =  .03695841


# save the regression result as "M1" for later comparison

In [10]:
estimate store M1_b

---

# Q1.c

# $$ \ln \left( \mathrm{price} \right) = \beta_0 + \beta_1 \cdot \mathrm{bdrms} + \beta_2 \cdot \ln \left( \mathrm{lotsize} \right) + \beta_3 \cdot \ln \left( \mathrm{sqrft} \right) + \beta_4 \cdot \ln \left( \mathrm{assess} \right) + u $$

In [11]:
reg lprice bdrms llotsize lsqrft lassess


      Source |       SS           df       MS      Number of obs   =        88
-------------+----------------------------------   F(4, 83)        =     70.58
       Model |  6.19607473         4  1.54901868   Prob > F        =    0.0000
    Residual |  1.82152879        83   .02194613   R-squared       =    0.7728
-------------+----------------------------------   Adj R-squared   =    0.7619
       Total |  8.01760352        87  .092156362   Root MSE        =    .14814

------------------------------------------------------------------------------
      lprice |      Coef.   Std. Err.      t    P>|t|     [95% Conf. Interval]
-------------+----------------------------------------------------------------
       bdrms |   .0338392   .0220983     1.53   0.129    -.0101135    .0777918
    llotsize |   .0074379   .0385615     0.19   0.848    -.0692593    .0841352
      lsqrft |  -.1032384   .1384305    -0.75   0.458     -.378571    .1720942
     lassess |   1.043065    .151446     6.89   0.

# save the slopes for $\mathrm{bdrms}$, $\ln \left( \mathrm{lotsize} \right)$ and $\ln \left( \mathrm{sqrft} \right)$ for later comparison

In [12]:
scalar b_bdrms_M1c    = _b[bdrms]
scalar b_llotsize_M1c = _b[llotsize]
scalar b_lsqrft_M1c   = _b[lsqrft]

# check the slopes you just saved

In [13]:
scalar list

b_lsqrft_M1c = -.10323837
b_llotsize_M1c =  .00743793
b_bdrms_M1c =  .03383919
b_lsqrft_M1b =  .70023241
b_llotsize_M1b =   .1679667
b_bdrms_M1b =  .03695841


# save the regression result as "M1" for later comparison

In [14]:
estimate store M1_c

---

# compare two models in the same regression table

In [15]:
estout M1_b M1_c, cells(b(star) se(par)) stats(N r2 r2_a vce)


--------------------------------------------
                     M1_b            M1_c   
                     b/se            b/se   
--------------------------------------------
bdrms            .0369584        .0338392   
               (.0275313)      (.0220983)   
llotsize         .1679667***     .0074379   
               (.0382812)      (.0385615)   
lsqrft           .7002324***    -.1032384   
               (.0928652)      (.1384305)   
lassess                          1.043065***
                                (.151446)   
_cons           -1.297042*        .263743   
               (.6512836)      (.5696647)   
--------------------------------------------
N                      88              88   
r2               .6429652        .7728088   
r2_a              .630214        .7618598   
vce                   ols             ols   
--------------------------------------------


# the change of slope for $\mathrm{bdrms}$

In [16]:
display "between M1_b and M1_c, the slope difference for bdrms is ",  abs(b_bdrms_M1b - b_bdrms_M1c)

between M1_b and M1_c, the slope difference for bdrms is  .00311922


# the change of slope for $\mathrm{llotsize}$

In [17]:
display "between M1_b and M1_c, the slope difference for llotsize is ",  abs(b_llotsize_M1b - b_llotsize_M1c)

between M1_b and M1_c, the slope difference for llotsize is  .16052878


# the change of slope for $\mathrm{lsqrft}$

In [18]:
display "between M1_b and M1_c, the slope difference for lsqrft is ",  abs(b_lsqrft_M1b - b_lsqrft_M1c)

between M1_b and M1_c, the slope difference for lsqrft is  .80347078


---

# the changes after including $\mathrm{lassess}$ is due to the sample correlation between $\mathrm{lassess}$ and other X variables
# larger the absolute value of the correlation, larger the change
# you can check the sample correlation as follows

In [19]:
corr lassess bdrms llotsize lsqrft

(obs=88)

             |  lassess    bdrms llotsize   lsqrft
-------------+------------------------------------
     lassess |   1.0000
       bdrms |   0.4587   1.0000
    llotsize |   0.5577   0.1695   1.0000
      lsqrft |   0.8647   0.5196   0.3113   1.0000



# export codes into an HTML file

In [21]:
!rm -rf T7_Stata.html
!jupyter nbconvert --to html T7_Stata.ipynb





[NbConvertApp] Converting notebook T7_Stata.ipynb to html
[NbConvertApp] Writing 606359 bytes to T7_Stata.html
