In [None]:
library(xts); library(quantmod); library(plyr)
library("vars")
library(TTR)



read in data

In [None]:
data.env <- readRDS("WorldMarkts99_20.RDS")
# save to csv

print(ls(envir = data.env))
print(head(data.env$IBEX))

In [None]:
markets <- ls(data.env)
markets

get daily high, low, and average price for each stock

In [None]:
# loop through each market

for (i in 1:length(markets)) {
    # write to csv with an index included
    write.zoo(data.env[[markets[i]]], file = paste0("data/", markets[i], ".csv"), sep = ",")
}

In [None]:
## get daily high, low, and average price for each stock

get_prices <- function(market) {
  data <- data.env[[market]]
  data <- data[, c("Date", "Close")]
  colnames(data) <- c("Date", "Price")
  data$Date <- as.Date(data$Date)
  data <- na.omit(data)
  data <- xts(data$Price, data$Date)
  return (data)

}

prices <- ldply(markets, get_prices, .id = "Market")
prices <- prices[order(prices$Market, prices$Date), ]
prices <- xts(prices$Price, order.by = prices$Date)
prices <- prices[!duplicated(index(prices)), ]

prices

loop through each market and calc the returns

In [None]:
returns <- xts() # xts object is a data structure that is used to store time series data
per<- "weekly" 
for(i in seq_along(markets)) {
  sym <- markets[i]
  print(sym)
  returns <- merge(returns, periodReturn(Ad(get(sym,envir=data.env)),period=per,type = "log", dropNa = FALSE)) # this function calculates the log returns of the adjusted closing prices
  # add a column header to the new column
  colnames(returns)[ncol(returns)] <- sym
}

In [None]:
# save daily returns to csv
# include index
write.zoo(returns, file = "data/weekly_returns.csv", index.name = "Date", sep = ",")

impute values

In [None]:

returns_imputed <- returns

returns_imputed <- na.locf(returns_imputed, fromLast=TRUE)

# count na values
sum(is.na(returns_imputed))

In [None]:
# it is the last roww that has na values so we can remove it
returns_imputed <- returns_imputed[-nrow(returns_imputed),]
sum(is.na(returns_imputed))

treat outliers

In [None]:
# find outliers and impute them with locf

# find outliers
outliers <- function(x) {
  qnt <- quantile(x, probs=c(.25, .75), na.rm = TRUE)
  H <- 1.75 * IQR(x, na.rm = TRUE)
  y <- x
  y[x < (qnt[1] - H)] <- NA
  y[x > (qnt[2] + H)] <- NA
  return(y)
}

returns_imputed <- apply(returns_imputed, 2, outliers)


In [None]:
# impute outliers 

returns_imputed <- na.locf(returns_imputed, fromLast=TRUE)
returns_imputed

extracting epoch time from the date and time columns

In [None]:
dI="2001-01-01"; dF="2003-01-01"
Retp <- returns_imputed[paste(dI,"/",dF,sep=""),]

### Question 1

In [None]:
library(ggplot2)

In [None]:
# Set lambda value
lambda <- 0.94

for (i in 1:ncol(Retp)) {
    returns <- Retp[, i]
    # Compute EWMA-based variance using EMA function
    # by using the EMA function, we can calculate the EWMA-based variance when we set the lambda value
    # the lambda value is the weight of the most recent observation
    # the difference between EMA and EWMA is that EMA is used to calculate the average of the data points, while EWMA is used to calculate the variance of the data points
    ewma_variance <- EMA(returns^2, n = 1 / (1 - lambda)) 

    # Compare with other volatility estimations (e.g., historical volatility)
    historical_volatility <- sqrt(rollapply(returns^2, width = 21, FUN = sum, align = "right", fill = NA) / 21)

    # 

    ggplot( aes(x = historical_volatility, y = index(historical_volatility))) +
    geom_line() +
    labs(x = "Date", y = "Value", title = "Line Graph Example")


}