# Fundamental Analysis

https://stocksearning.com/q.aspx
url <- 'https://www.cnbc.com/quotes/?symbol=aapl&tab=earnings'


In [1]:
library(XML)
library(rlist)
library(dplyr)
library(quantmod)
library(reshape2)
library(XLConnect)
setwd('/Users/jtan/Documents/Investment/Fundamental-Analysis')

# avoid scientific number format
options(scipen=999)

STOCK_LIST = c('V')
START_DATE <- '2016-01-01'
END_DATE <- '2017-01-01'

fund_output_table <- data.frame (
    symbol=character(),
    period=character(),
    metric=character(),
    value=numeric(),
    stringsAsFactors=FALSE
)

price_output_table <- data.frame (
    Symbol=character(),
    Date=character(),
    Open=numeric(),
    High=numeric(),
    Low=numeric(),
    Close=numeric(),
    Volume=numeric(),
    Adjusted=numeric(),
    EPS=numeric(),
    Dividend=numeric(),
    stringsAsFactors=FALSE
)

# period: Y = annual reporting, Q = quarterly reporting
# report_type: Income Statement, Cash Flow, Balance Sheet
get_stockrow_data <- function(symbol, report_type, report) {
    # http://stockrow.com/api/companies/NVDA/financials.xlsx?dimension=MRQ&section=Income Statement
    url <- paste('http://stockrow.com/api/companies/', symbol, '/financials.xlsx?dimension=MR', report, '&section=', report_type, sep = '')
    download.file(url, 'temp.xlsx', mode="wb")
    readWorksheet(loadWorkbook('temp.xlsx'), sheet = symbol, header = TRUE)
}

reshape_stockrow_data <- function(data, symbol) {
    # selecting the desired metric
    colnames(data)[1] <- 'Metric'
    # fix column names
    name_array <- colnames(data)
    for (i in 2:length(name_array)) { 
        name_array[i] <- substr(colnames(data)[i], 2, 8) 
    }
    name_array <- gsub("\\.", "\\-", name_array)
    colnames(data) <- name_array
    # reshape and organize data
    data <- melt(data, id=c("Metric"))
    colnames(data)[2] <- 'Period'
    data$Symbol <- symbol
    data$Value <- ifelse(abs(data$value) > 1000, as.integer(data$value/1000000), data$value)
    data <- data[c('Symbol','Period','Metric','Value')]
}

get_earnings_date <- function(symbol) {
    url <- paste('http://www.nasdaq.com/symbol/', symbol, '/earnings-surprise', sep='')
    tables <- readHTMLTable(url)
    tables <- list.clean(tables, fun = is.null, recursive = FALSE)
    n.cols <- unlist(lapply(tables, function(t) dim(t)[2]))
    data <- tables[[which.max(n.cols)]]
}

reshape_earnings_date <- function(data) {
    # keep only what is needed
    data <- data[-1,1:2]
    colnames(data)[1] <- 'Period'
    colnames(data)[2] <- 'DateReported'
    data <- data.frame(lapply(data, as.character), stringsAsFactors=FALSE)
    data$DateReported <- as.Date(data$DateReported, '%m/%d/%Y')
    data.month <- substr(data$Period,1,3) 
    data.month <- match(data.month,month.abb) 
    data.month <- sprintf("%02d", data.month)
    data.year <- substr(data$Period,4,7)
    data$Period <- paste(data.year, data.month, sep='-')
    data <- data[c('Period','DateReported')]
}

start_date_lag <- function(start_date, num_year) {
    new_date <- as.POSIXlt(as.Date(start_date))
    new_date$year <- new_date$year - num_year
    new_date <- as.Date(new_date)
} 

# initialize table
stockrow_table <- fund_output_table # multiple report_types
price_table <- price_output_table # multiple stock symbols

for (i in 1:length(STOCK_LIST)) {
    
    cat('Processing', STOCK_LIST[i],'\n')
    
    # Income Statement
    data.is <- get_stockrow_data(STOCK_LIST[i], 'Income Statement', 'Q')
    result.is <- reshape_stockrow_data(data.is, STOCK_LIST[i])
    stockrow_table <- rbind(result.is, stockrow_table)

    # Cash Flow
    data.cf <- get_stockrow_data(STOCK_LIST[i], 'Cash Flow', 'Q')
    result.cf <- reshape_stockrow_data(data.cf, STOCK_LIST[i])
    stockrow_table <- rbind(result.cf, stockrow_table)

    # Balance Sheet
    data.bs <- get_stockrow_data(STOCK_LIST[i], 'Balance Sheet', 'Q')
    result.bs <- reshape_stockrow_data(data.bs, STOCK_LIST[i])
    stockrow_table <- rbind(result.bs, stockrow_table)
        
    # get pricing data
    data.price <- getSymbols(STOCK_LIST[i], from=START_DATE, to=END_DATE, auto.assign=FALSE)
    data.price <- data.frame(Date = index(data.price), data.price, row.names=NULL)
    colnames(data.price) <- c('Date','Open','High','Low','Close','Volume','Adjusted')
    data.price <- data.price[order(data.price$Date, decreasing=TRUE),]
    data.price$Symbol <- STOCK_LIST[i]
        
    # get earnings data
    data.eps <- stockrow_table %>% filter(Metric == 'EPS')    
    data.date <- get_earnings_date(STOCK_LIST[i])
    data.date <- reshape_earnings_date (data.date)
    unique <- data.eps %>% distinct(Period) %>% mutate_if(is.factor, as.character)
    colnames(unique)[1] <- 'Period'
    
    # check whether reporting periods are the same
    print(data.date[,1])
    print(unique[1:4,1])
    if (nrow(merge(unique, data.date, by='Period')) < 4) {
        data.date[,1] <- unique[1:4,1]
    } # always TRUE
        
    new <- merge(unique, data.date, by='Period', all=TRUE)
    new <- new[order(new$Period, decreasing=TRUE),]
    for(j in min(which(is.na(new$DateReported))):nrow(new)) {
        new$DateReported[j] <- as.yearmon(new$DateReported[j-4]) - 1 
        new$DateReported[j] <- as.Date(new$DateReported[j]) + as.numeric(format(new$DateReported[j-4],'%d')) - 1
    }    
    data.eps.quarter <- merge(data.eps, new, by=c('Period'), all=TRUE)
    data.eps.quarter <- data.eps.quarter %>% select(DateReported, Value) %>% arrange(desc(DateReported))  
    data.eps.annual <- data.frame(matrix(NA, nrow=nrow(data.eps.quarter)-3, ncol=2)) 
    colnames(data.eps.annual) <- c('Date','EPS')
    data.eps.annual$Date <- head(data.eps.quarter$DateReported, -3)
    data.eps.annual$EPS <- rollapply(data.eps.quarter$Value, width = 4, by = 1, FUN = sum, align = "left")
    data.eps.annual$Symbol <- STOCK_LIST[i]
    
    # merge/combine data
    data.combine <- merge(data.price, data.eps.annual, by=c('Symbol','Date'), all=TRUE)
    # sort increasing to allow correct derivation of dividend
    data.combine <- data.combine[order(data.combine$Date),]
    if (is.na(data.combine$EPS[1])) 
        data.combine$EPS[1] <- tail(data.eps.annual$EPS, n=1)
    data.combine$EPS <- na.locf(data.combine$EPS)
    data.combine <- na.omit(data.combine)
        
    # get dividend data
    new <- getDividends(STOCK_LIST[i], from=start_date_lag(START_DATE, 1), to=END_DATE, auto.assign=FALSE)
    if(nrow(new) == 0) {
        cat('Skipping', STOCK_LIST[i], '... no dividend income\n')
        # just insert zeroes as dividends
        data.combine$Dividend <- 0
        next
    }
    data.div.quarter <- data.frame(Date = index(new), new, row.names=NULL)
    colnames(data.div.quarter) <- c('Date','Dividend')
    # reverse sort and do moving sum
    data.div.quarter <- data.div.quarter[order(data.div.quarter$Date, decreasing=TRUE),]  
    data.div.annual <- data.frame(matrix(NA, nrow=nrow(data.div.quarter)-3, ncol=2)) 
    colnames(data.div.annual) <- c('Date','Dividend')
    data.div.annual$Date <- head(data.div.quarter$Date, -3)
    data.div.annual$Dividend <- rollapply(data.div.quarter$Dividend, width = 4, by = 1, FUN = sum, align = "left")
    data.div.annual$Symbol <- STOCK_LIST[i]
        
    # merge/combine data
    data.combine <- merge(data.combine, data.div.annual, by=c('Symbol','Date'), all=TRUE)
    # sort increasing to allow correct derivation of dividend
    data.combine <- data.combine[order(data.combine$Date),]
    if (is.na(data.combine$Dividend[1])) 
        data.combine$Dividend[1] <- tail(data.div.annual$Dividend, n=1)
    data.combine$Dividend <- na.locf(data.combine$Dividend)
    data.combine <- na.omit(data.combine) 
        
    price_table <- rbind(data.combine, price_table)
}
        
# clean up
if (file.exists('temp.xlsx')) file.remove('temp.xlsx')
    
write.table(price_table, file = 'Stock_Price.csv', sep = ",", col.names = TRUE, row.names = FALSE, quote = FALSE)   
write.table(stockrow_table, file = 'Fundamental_Metrics.csv', sep = ",", col.names = TRUE, row.names = FALSE, quote = FALSE)   



Attaching package: ‘dplyr’

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



ERROR: Error in library(XLConnect): there is no package called ‘XLConnect’
