In [17]:
library(TSA)
library(mgcv)

In [18]:
data=read.csv("HouseData.csv", header=TRUE)
head(data, n=10)

Sno,Timeline,Houses_for_Sale,Employment_Ratio,GDP_Growth,Bank_Prime_Rate,SP500
1,Mar-76,314,56.7,3.1,6.75,102.77
2,Jun-76,336,56.8,2.1,7.2,104.28
3,Sep-76,341,56.9,3.0,7.0,105.24
4,Dec-76,358,57.0,4.7,6.35,107.46
5,Mar-77,353,57.4,8.1,6.25,98.42
6,Jun-77,369,57.9,7.3,6.75,100.48
7,Sep-77,392,58.1,0.0,7.13,96.53
8,Dec-77,408,58.7,1.4,7.75,95.1
9,Mar-78,398,58.8,16.5,8.0,89.21
10,Jun-78,422,59.5,4.0,8.63,95.53


In [19]:
par(mfrow=c(2,2))
house = as.vector((data[,3]))
house = ts(house,start=c(1976,3),frequency=4)
#ts.plot(house,ylab="Raw data")

#acf(house)


In [20]:
log.house=log(house)
growth=diff(log.house)

In [21]:
par(mfrow=c(2,2))
#plot(growth)
#acf(growth)

In [6]:
dif.season.growth=diff(growth, lag=4)

In [22]:
par(mfrow=c(2,2))
#plot(dif.season.growth)
#acf(dif.season.growth, lag=4*5)


In [24]:
## Estimate seasonality using ANOVA approach
time.pts = c(1:length(house))
time.pts = c(time.pts - min(time.pts))/max(time.pts)

model.wo.season=lm(log(house)~time.pts)
#summary(model.wo.season)


har=harmonic(log(house),1)
model.w.season = lm(log(house)~time.pts+har)
#summary(model.w.season)

#anova(model.wo.season,model.w.season)

In [25]:
#Spline
gam.fit=gam(log.house~s(time.pts)+har)
#summary(gam.fit)

par(mfrow=c(2,2))
#plot(log.house)
#lines(ts(fitted(gam.fit), start=c(1976,3), frequency=4), col="red")


In [26]:
 ##Pre-process data. Split into train and test 
house.train = house[c(1:160)]
house.test = house[-c(1:160)] 
time.pts = (c(1:length(house)) - min(1:length(house)))/max(1:length(house)) 
train.time.pts = time.pts[c(1:160)] 
test.time.pts = time.pts[-c(1:160)]


##Fit model library(mgcv) 
param = lm(house.train~ train.time.pts+train.time.pts^2)
loc = loess( house.train ~ train.time.pts, control=loess.control(surface="direct"))
splines = gam( house.train ~ s(train.time.pts))
 


In [27]:
#Predictions
param.pred = predict.lm( param, newdata = data.frame(train.time.pts = test.time.pts)) 
loess.pred = predict( loc, newdata = data.frame(train.time.pts = test.time.pts))
splines.pred = mgcv::predict.gam( splines, newdata = data.frame(train.time.pts = test.time.pts))


In [28]:
par(mfrow=c(2,1))

#plot(house.test, type = 'l', main='Predicted vs Actual', ylim =c(min(c(house, param.pred,loess.pred,splines.pred)),max(c(house, param.pred,loess.pred,splines.pred)))) 
#lines( param.pred, col='red') 
#lines(loess.pred, col='green') 
#lines( splines.pred, col='purple')
#legend(x="topleft", legend=c("Param", "Loess", "Splines"), col=c("red", "green", "purple"), lty=1:2, cex=0.8)
 

In [14]:

test_modelA <- function(p,d,q){
  model = arima(log.house, order=c(p,d,q), method="ML")
  current.aic = AIC(model)
  df = data.frame(p,d,q,current.aic)
  names(df) <- c("p","d","q","AIC")
  print(paste(p,d,q,current.aic,sep=" "))
  return(df)
}

orders = data.frame(Inf,Inf,Inf,Inf)
names(orders) <- c("p","d","q","AIC")


for (p in 0:3){
  for (d in 0:1){
    for (q in 0:3) {
      possibleError <- tryCatch(
        orders<-rbind(orders,test_modelA(p,d,q)),
        error=function(e) e
      )
      if(inherits(possibleError, "error")) next
      
    }
  }
}
orders <- orders[order(-orders$AIC),]
tail(orders)


[1] "0 0 0 58.8495983620467"
[1] "0 0 1 -133.631219559126"
[1] "0 0 2 -313.41196397034"
[1] "0 0 3 -386.012118474023"
[1] "0 1 0 -553.874267663474"
[1] "0 1 1 -585.143022611934"
[1] "0 1 2 -606.414318372284"
[1] "0 1 3 -604.468345968299"
[1] "1 0 0 -551.767258236421"
[1] "1 0 1 -584.670676430887"
[1] "1 0 2 -605.599851404294"
[1] "1 0 3 -598.9266521364"
[1] "1 1 0 -611.136337702468"
[1] "1 1 1 -626.597312931752"
[1] "1 1 2 -625.108804274741"
[1] "1 1 3 -624.241649477028"
[1] "2 0 0 -611.643106360899"
[1] "2 0 1 -550.03722592429"
[1] "2 0 2 -611.783159823755"


"possible convergence problem: optim gave code = 1"

[1] "2 0 3 -616.490556536162"
[1] "2 1 0 -623.344016606811"
[1] "2 1 1 -633.19045476797"
[1] "2 1 2 -636.735524623087"
[1] "2 1 3 -638.738503831295"
[1] "3 0 0 -627.12026947298"
[1] "3 0 1 -634.836923475638"
[1] "3 0 2 -646.497848306649"
[1] "3 0 3 -598.226257440134"
[1] "3 1 0 -622.452193119966"
[1] "3 1 1 -632.514109588648"
[1] "3 1 2 -636.065783035183"
[1] "3 1 3 -639.211629417959"


Unnamed: 0,p,d,q,AIC
27,3,0,1,-634.8369
32,3,1,2,-636.0658
24,2,1,2,-636.7355
25,2,1,3,-638.7385
33,3,1,3,-639.2116
28,3,0,2,-646.4978


In [29]:
arima.fit=arima(log.house, order = c(3,1,2), seasonal=list(order=c(0,1,0),period=4), method = "ML")
#coef(arima.fit)
#arima.fit



In [30]:
#abs(polyroot(c(1, coef(arima.fit)[1:3])))
#abs(polyroot(c(1, coef(arima.fit)[4:5])))