In [None]:
input.dir <- './../input/'

In [None]:
library(MASS)

In [None]:
removedColumnsIndex <- function(data, columIds) {
    setdiff(colnames(data), columIds)
}

In [None]:
columnsToNumeric <- function(data, columIds) {
    for (i in columIds) {
        data[,i] <- as.numeric(data[,i])
    }
    return(data)
}

In [None]:
preprocessData <- function(data) {
    columns.to.remove <- c('Name', 'Cabin', 'Ticket', 'Age')
    data.cleaned <- data[,removedColumnsIndex(data, columns.to.remove)]

    columns.to.numeric <- c('Sex', 'Embarked')
    data.cleaned <- columnsToNumeric(data.cleaned, columns.to.numeric)
    
    columns.to.scale <- setdiff(colnames(data.cleaned), c('PassengerId', 'Survived'))
    data.cleaned[,columns.to.scale] <- scale(data.cleaned[,columns.to.scale])
    data.cleaned[is.na(data.cleaned)] <- 0
    
    
    return(data.cleaned)
}

In [None]:
predictDataDiscriminant <- function(data, model) {
    
    data.prediction <- cbind(data[,'PassengerId'] , as.numeric(predict(model, data)$class) - 1)
    colnames(data.prediction) <- c('PassengerId', 'Survived')

    return(data.prediction)
}

In [None]:
predictDataRegression <- function(data, model) {
    
    data.prediction <- cbind(data[,'PassengerId'] , round(predict(model, data, type='response')))
    colnames(data.prediction) <- c('PassengerId', 'Survived')
    
    return(data.prediction)
}

In [None]:
plotWithPCA <- function(data) {
    data.pca <- princomp(data[,removedColumnsIndex(data, 'Survived')])
    plot(data.pca$scores[,1:2], col = as.numeric(data[,'Survived'])+2)
}

In [None]:
data.train <- read.csv(paste(input.dir, 'train.csv', sep =""))
data.test <- read.csv(paste(input.dir, 'test.csv', sep =""))

In [None]:
head(data.train)

In [None]:
data.train.use <- preprocessData(data.train)
plotWithPCA(data.train.use)
head(data.train.use)

In [None]:
data.test.use <- preprocessData(data.test)
head(data.test.use)

In [None]:
data.classifier.formula <- as.formula('Survived ~ .')


In [None]:
data.classifier.lda <- lda(data.classifier.formula, data.train.use)
data.test.prediction.lda <- predictDataDiscriminant(data.test.use, data.classifier.lda)

head(data.test.prediction.lda)

write.csv(data.test.prediction.lda, file = "prediction-lda.csv", row.names = FALSE)

In [None]:
data.classifier.qda <- qda(data.classifier.formula, data.train.use)

data.test.prediction.qda <- predictDataDiscriminant(data.test.use, data.classifier.qda)
head(data.test.prediction.qda)

write.csv(data.test.prediction.qda, file = "prediction-qda.csv", row.names = FALSE)

In [None]:
data.classifier.logistic <- glm(data.classifier.formula, data.train.use, family = binomial)

data.test.prediction.logistic <- predictDataRegression(data.test.use, data.classifier.logistic)[,c('PassengerId', 'Survived')]
head(data.test.prediction.logistic)

write.csv(data.test.prediction.logistic, file = "prediction-logistic.csv", row.names = FALSE)