# Imputing Financial Data  

Based on the paper at [this link](https://github.com/dppalomar/imputeFin).  
Read how it works at [this link](https://cran.r-project.org/web/packages/imputeFin/vignettes/ImputeFinancialTimeSeries.html).

In [None]:
# install stable version from CRAN
install.packages("imputeFin")
install.packages('data.table')

In [None]:
library(imputeFin)
library(data.table)

In [None]:
# Import the data and look at the first six rows
get_data <- function(){
    dataset <- read.csv(file = '../data/data_set_challenge.csv')
    rownames(dataset) <- dataset$Date
    dataset$Date <- NULL
    return(dataset)
}

dataset <- get_data()
head(dataset)

In [None]:
x1 <- dataset$X1

In [None]:
plot_imputed(x1, title = "Original time series with missing values and one outlier")

In [None]:
y_imputed <- impute_AR1_t(x1, remove_outliers = FALSE)
#> var c: 60 missing values imputed and 1 outliers detected and corrected.
plot_imputed(y_imputed)

In [None]:
impute_column <- function(ts){
    print(head(ts))
    return(impute_AR1_t(ts, remove_outliers = FALSE, verbose = FALSE))
}

standardize_colname <- function(cn){
    substr(cn, 2, 100)
}


# TODO

- run on each time serie individually
- create a submit file
- make the code pretty since it's a code review

In [None]:
impute_data <- function(){
    data <- dataset[, 0:5]
    df <- cbind(row.names(data), data.frame(apply(data, 2, impute_column)))
    colnames(df) <- c("Date", unlist(lapply(list(colnames(data)), standardize_colname)))
    return(df)
}

In [None]:
df <- impute_data()

In [None]:
df

In [None]:
# write.csv(df, "../data/r_submission.csv")

# RUN HERE

In [None]:
install.packages(c("imputeFin", 'data.table', "pbapply", "dplyr"))

In [None]:
library(imputeFin)
library(data.table)
library(pbapply)
library(dplyr)

In [None]:
# Import the data and look at the first six rows
get_data <- function(){
  dataset <- read.csv(file = '../data/data_set_challenge.csv')
  rownames(dataset) <- dataset$Date
  dataset$Date <- NULL
  types <- read.csv(file = '../data/final_mapping_candidat.csv') %>% select(Type, mapping_id)
  print(unique(types$Type))
  indexes_fx_rates <- types %>% filter(Type %in% c("BOND", "STOCK")) %>% pull(mapping_id)
  dataset[indexes_fx_rates, ] <- log(dataset[indexes_fx_rates, ])
  print(indexes_fx_rates)
  return(dataset)
}

impute_column <- function(ts){
  tryCatch(
    {
      return(exp(impute_AR1_t(ts, remove_outliers = FALSE, verbose = FALSE)))
    },
    error=function(err){
      return(ts)
    },
    warning=function(w){
      return(ts)
    }
  )
  
}

standardize_colname <- function(cn){
  substr(cn, 2, 100)
}

impute_data <- function(){
  data <- dataset#[, 0:5]
  df <- cbind(row.names(data), data.frame(pbapply(data, 2, impute_column)))
  colnames(df) <- c("Date", unlist(lapply(list(colnames(data)), standardize_colname)))
  return(df)
}

In [None]:
dataset <- get_data()
print(dim(dataset))
head(dataset)

In [None]:
df <- impute_data()

In [None]:
write.csv(df, "../data/r_submission.csv")