# Stock Returns

## Set the Directory

In [1]:
setwd("/jupyter")

## Clear Memory

In [2]:
rm(list=ls())

## Import the Data

In [3]:
data=read.csv('csv-file/goldreturns.csv',header=T) # our data has headers (Tmeans True).
head(data)
tail(data)
attach(data)         # "attach"lets us use the variable names.

Date,gold,nyse,nasdaq
<fct>,<dbl>,<dbl>,<dbl>
1973-01-31,2.01,-2.6721,-3.7341
1973-02-28,29.24,-4.4088,-5.8657
1973-03-31,5.8,-0.6709,-2.3987
1973-04-30,0.5,-4.7597,-8.0667
1973-05-31,26.24,-2.3386,-4.6526
1973-06-30,7.86,-1.0214,-1.7628


Unnamed: 0_level_0,Date,gold,nyse,nasdaq
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<dbl>
523,2016-07-31,1.18,2.6872,6.6157
524,2016-08-31,-1.35,-0.2532,0.8787
525,2016-09-30,1.02,-0.5878,1.823
526,2016-10-31,-4.04,-2.2944,-2.3002
527,2016-11-30,-6.81,4.5701,2.6509
528,2016-12-31,-2.38,1.9488,1.1751


## Convert Data into Time Series

In [4]:
gold_ts=ts(gold,frequency=12,start=c(1973,1))
nyse_ts=ts(nyse,frequency=12,start=c(1973,1))
nasdaq_ts =ts(nasdaq,frequency=12,start=c(1973,1))

## Plotting the Data

In [5]:
pdf('pdf-file/goldreturns.pdf')
par(mfrow=c(3,1))
ts.plot(gold_ts)
ts.plot(nyse_ts)
ts.plot(nasdaq_ts)
dev.off()

## Data Summary

In [6]:
library(psych)       # very useful for data summary.
describe(data[,-1])

Unnamed: 0_level_0,vars,n,mean,sd,median,trimmed,mad,min,max,range,skew,kurtosis,se
Unnamed: 0_level_1,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
gold,1,528,0.7238636,6.034589,0.015,0.411816,4.59606,-21.37,29.24,50.61,0.9231638,4.265637,0.2626218
nyse,2,528,0.636496,4.353424,0.9839,0.7602703,3.980188,-21.8447,16.4814,38.3261,-0.4956494,2.378947,0.1894585
nasdaq,3,528,0.8781858,6.151811,1.3013,1.0599858,5.078943,-27.1763,21.8964,49.0727,-0.4768352,1.836064,0.2677232


In [7]:
t.test(gold,nyse,mu=0,type='two.sided')
t.test(gold,nasdaq,mu=0,type='two.sided')
t.test(nyse,nasdaq,mu=0,type='two.sided')


	Welch Two Sample t-test

data:  gold and nyse
t = 0.2698, df = 958.63, p-value = 0.7874
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.5481263  0.7228616
sample estimates:
mean of x mean of y 
0.7238636 0.6364960 



	Welch Two Sample t-test

data:  gold and nasdaq
t = -0.4115, df = 1053.6, p-value = 0.6808
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.8902086  0.5815643
sample estimates:
mean of x mean of y 
0.7238636 0.8781858 



	Welch Two Sample t-test

data:  nyse and nasdaq
t = -0.73691, df = 949, p-value = 0.4614
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.8853377  0.4019581
sample estimates:
mean of x mean of y 
0.6364960 0.8781858 


## Convert return into the squared return

In [8]:
gold_sq=gold^2
nyse_sq=nyse^2
nasdaq_sq=nasdaq^2
data_sq=data[,-1]^2

## Testing for Correlation

In [9]:
cor(data_sq)
cor.test(gold_sq,nyse_sq)
cor.test(gold_sq,nasdaq_sq)
cor.test(nyse_sq,nasdaq_sq)

Unnamed: 0,gold,nyse,nasdaq
gold,1.0,0.1030494,0.01901549
nyse,0.10304939,1.0,0.65162251
nasdaq,0.01901549,0.6516225,1.0



	Pearson's product-moment correlation

data:  gold_sq and nyse_sq
t = 2.3761, df = 526, p-value = 0.01786
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.01787475 0.18673913
sample estimates:
      cor 
0.1030494 



	Pearson's product-moment correlation

data:  gold_sq and nasdaq_sq
t = 0.43619, df = 526, p-value = 0.6629
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 -0.06642411  0.10417827
sample estimates:
       cor 
0.01901549 



	Pearson's product-moment correlation

data:  nyse_sq and nasdaq_sq
t = 19.702, df = 526, p-value < 2.2e-16
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
 0.5996328 0.6981351
sample estimates:
      cor 
0.6516225 


## Testing for Normality

In [10]:
shapiro.test(gold) 
shapiro.test(nyse) 
shapiro.test(nasdaq)
shapiro.test(gold_sq) 
shapiro.test(nyse_sq) 
shapiro.test(nasdaq_sq)
# Conclusion: none of them are normally distributed since p<0.05.


	Shapiro-Wilk normality test

data:  gold
W = 0.92674, p-value = 2.245e-15



	Shapiro-Wilk normality test

data:  nyse
W = 0.97365, p-value = 3.799e-08



	Shapiro-Wilk normality test

data:  nasdaq
W = 0.97614, p-value = 1.397e-07



	Shapiro-Wilk normality test

data:  gold_sq
W = 0.37707, p-value < 2.2e-16



	Shapiro-Wilk normality test

data:  nyse_sq
W = 0.47117, p-value < 2.2e-16



	Shapiro-Wilk normality test

data:  nasdaq_sq
W = 0.52387, p-value < 2.2e-16


## Testing for Autocorrelation

In [11]:
Box.test(gold,lag=4,type='Ljung-Box')
# Conclusion: No significant autocorrelation.
Box.test(nyse,lag=4,type='Ljung-Box')
# Conclusion: No significant autocorrelation.
Box.test(nasdaq,lag=4,type='Ljung-Box')
# Conclusion: No significant autocorrelation.
Box.test(gold_sq,lag=4,type='Ljung-Box')
# Conclusion: significant autocorrelation.
Box.test(nyse_sq,lag=4,type='Ljung-Box')
# Conclusion: significant autocorrelation.
Box.test(nasdaq_sq,lag=4,type='Ljung-Box')
# Conclusion: significant autocorrelation.
#EndOfCode.


	Box-Ljung test

data:  gold
X-squared = 2.4018, df = 4, p-value = 0.6623



	Box-Ljung test

data:  nyse
X-squared = 3.5692, df = 4, p-value = 0.4674



	Box-Ljung test

data:  nasdaq
X-squared = 7.3077, df = 4, p-value = 0.1205



	Box-Ljung test

data:  gold_sq
X-squared = 84.89, df = 4, p-value < 2.2e-16



	Box-Ljung test

data:  nyse_sq
X-squared = 24.344, df = 4, p-value = 6.815e-05



	Box-Ljung test

data:  nasdaq_sq
X-squared = 85.699, df = 4, p-value < 2.2e-16


## starting from 1974

In [12]:
gold_w=window(gold_ts,start=c(1974,1))
nyse_w=window(nyse_ts,start=c(1974,1))
nasdaq_w=window(nasdaq_ts,start=c(1974,1))

In [13]:
library(dynlm)

gold_lag1=lag(gold_w,-1)
lm1=dynlm(gold_w~gold_lag1+nyse_w+nasdaq_w,start=c(1974,1))
summary(lm1)

ERROR: Error in library(dynlm): there is no package called ‘dynlm’
