In [2]:
library(data.table)
library(dplyr)
library(stats)
library(reshape2)
library(ggplot2)
#library(zoo)
library(ggcorrplot)


Sys.setlocale("LC_TIME", "en_US.UTF-8")

In [3]:
correlationTable = function(graphs) {
  cross <- cor(x=graphs, method = "spearman", use="na.or.complete")
  pmat <- cor_pmat(graphs)
  
  return(list(corr=cross, p.mat=pmat))
}
findCorrelated = function(orig, highCorr){
  match = highCorr[highCorr[,1] == orig | highCorr[,2] == orig,]
  match = as.vector(match)
  match[match != orig]
}
extractClusters<-function(correlationTable){
  highCorr <- which(correlationTable > 0.90 , arr.ind = TRUE)
  
  clusters=list()
  for (origin in 1:nrow(correlationTable)){
    
    match <- findCorrelated(origin, highCorr)
    
    #match2 <- unique(unlist(sapply(match, findCorrelated, highCorr, simplify = T)))
    
    clusters[[origin]]<-unique(match)
  }
  
  return(clusters)
}

In [4]:
df = read.csv(file="data/crypto-markets.csv", header=TRUE)

# convert the column to Date
df$date <- as.Date(df$date, "%Y-%m-%d")

# Extract the top 20 currencies and take a sane date range...
filtereddf <- df %>% select(name, date, ranknow, close) %>% filter(ranknow <= 20, date >= "2016-01-01")

# ... and plot them,
print(ggplot(filtereddf) + geom_line(aes(x = date, y = close, colour = name)))

# Pivot the dataframe in order to process it more easily
pivotdf <- dcast(filtereddf, date ~ name)

# Scale the values so as to be able to calculate the correlation
#pivotdf[,-1] <- scale(pivotdf[,-1])

# Generate the pairwise correlation table between the 20 crypto-currencies
cortbl <-correlationTable(data.frame(scale(pivotdf[,-1])))

# Find the currencies that are correlated to within 95% of Bitcoin
highCorr = which(cortbl$corr > 0.95 , arr.ind = TRUE)
match = findCorrelated(3, highCorr)
print(colnames(pivotdf[match+1])) # Print the names of the matching currencies

# Melt the resulting currencies so they can be plotted by ggplot
tempdf = melt(pivotdf[, c(1, 3, match+1)], id.vars="date")
# Plot them
print(ggplot(tempdf) + geom_line(aes(x = date, y = value, colour = variable)))

# Plot the correlation matrix
#corrplot::corrplot(cortbl, type="lower")
print(ggcorrplot(cortbl$corr, p.mat=cortbl$p.mat))

# Cluster the currencies with correlation > 95%
m=extractClusters(cortbl$corr)

plotCluster <- function(clusterNumber, clusters, currencyTable) {
  currencyNames <- colnames(currencyTable)[-1]
  tempdf = melt(currencyTable[, c(1, clusters[[clusterNumber]]+1)], id.vars="date")
  print(ggplot(tempdf) + geom_line(aes(x = date, y = value, colour = variable)) + ggtitle(paste("Currencies correlated to", currencyNames[clusterNumber])))
}

# Plot some example clusters
plotCluster(1, m, cbind(data.frame(date=pivotdf$date), data.frame(scale(pivotdf[,-1]))))
plotCluster(2, m, cbind(data.frame(date=pivotdf$date), data.frame(scale(pivotdf[,-1]))))
plotCluster(3, m, cbind(data.frame(date=pivotdf$date), data.frame(scale(pivotdf[,-1]))))
plotCluster(4, m, cbind(data.frame(date=pivotdf$date), data.frame(scale(pivotdf[,-1]))))

##### Load conventional currencies ########################
df.eur = read.csv(file="data/EUR_USD Historical Data.csv", header=TRUE)
df.eur$Date <- as.Date(df.eur$Date, "%b %d, %Y")
df.eur <- df.eur[,c("Date", "Price")]
df.eur$Price <- as.numeric(df.eur$Price)
colnames(df.eur) <- c("Date", "Price.eur")

df.gbp = read.csv(file="data/GBP_USD Historical Data.csv", header=TRUE)
df.gbp$Date <- as.Date(df.gbp$Date, "%b %d, %Y")
df.gbp <- df.gbp[,c("Date", "Price")]
df.gbp$Price <- as.numeric(df.gbp$Price)
colnames(df.gbp) <- c("Date", "Price.gbp")

df.jpy = read.csv(file="data/JPY_USD Historical Data.csv", header=TRUE)
df.jpy$Date <- as.Date(df.jpy$Date, "%b %d, %Y")
df.jpy <- df.jpy[,c("Date", "Price")]
df.jpy$Price <- as.numeric(df.jpy$Price)
colnames(df.jpy) <- c("Date", "Price.jpy")

# Take bitcoin
conv.currencies <- pivotdf[,c("date", "Bitcoin")]
colnames(conv.currencies)[1] <- "Date"
conv.currencies$Bitcoin <- as.numeric(conv.currencies$Bitcoin)

# Add our conventional currencies
conv.currencies <- merge(conv.currencies, merge(df.eur, merge(df.gbp, df.jpy, by="Date", all=TRUE), by="Date", all=TRUE), by="Date", all=TRUE)
# Impute NA's with the previous value in the dataframe
conv.currencies[,-1] <- conv.currencies[,-1] %>% do(na.locf(.))

conv.currencies.scaled <- conv.currencies
conv.currencies.scaled[-1] <- scale(conv.currencies[-1], scale=TRUE)

# Melt the resulting currencies so they can be plotted by ggplot
tempdf = melt(conv.currencies.scaled, id.vars="Date")

# Plot them
print(ggplot(tempdf) + geom_line(aes(x = Date, y = value, colour = variable)))

conv.corr <- correlationTable(conv.currencies.scaled[-1])
#corrplot(conv.corr$corr, type="lower", addCoef.col = "blue")
print(ggcorrplot(conv.corr$corr, p.mat=conv.corr$p.mat))

#plot(x=conv.currencies.scaled[,"Bitcoin"], y=conv.currencies.scaled[,"Price.jpy"])


“cannot open file 'data/crypto-markets.csv': No such file or directory”

ERROR: Error in file(file, "rt"): cannot open the connection
