In [None]:
# Read dataset of health care quaility
quality = read.csv("quality.csv")
str(quality)


In [None]:
#Computing the baseline accuracy
BaseAccuracy = max(sum(quality$PoorCare)/nrow(quality),(sum(1-quality$PoorCare))/nrow(quality))
BaseAccuracy

In [None]:
#Install package of caTools (for random sampling)
install.packages("caTools",repos='https://mirrors.tuna.tsinghua.edu.cn/CRAN/')
library(caTools)

In [None]:
#Randomly split the data set into training set and testing set
set.seed(88)

split = sample.split(quality$PoorCare, SplitRatio = 0.75)
split

table(split)

In [None]:
# Create training and testing sets
qualityTrain = subset(quality, split == TRUE)
qualityTest = subset(quality, split == FALSE)

In [None]:
# Building the logistic regression model
QualityLog = glm(PoorCare ~ OfficeVisits + Narcotics, data=qualityTrain, family=binomial)
summary(QualityLog)

In [None]:
# Make predictions on training set
predictTrain = predict(QualityLog, type="response")

# Analyze predictions
summary(predictTrain)


# Confusion matrix for threshold of 0.5
table(qualityTrain$PoorCare, predictTrain > 0.5)



# Confusion matrix for threshold of 0.7
table(qualityTrain$PoorCare, predictTrain > 0.7)



# Confusion matrix for threshold of 0.2
table(qualityTrain$PoorCare, predictTrain > 0.2)


In [None]:
# Install and load ROCR package
install.packages("ROCR",repos='https://mirrors.tuna.tsinghua.edu.cn/CRAN/')
library(ROCR)

In [None]:
ROCRpred = prediction(predictTrain, qualityTrain$PoorCare)

# Performance function
ROCRperf = performance(ROCRpred, "tpr", "fpr")

# Plot ROC curve
plot(ROCRperf)

# Add colors
plot(ROCRperf, colorize=TRUE)

# Add threshold labels 
plot(ROCRperf, colorize=TRUE, print.cutoffs.at=seq(0,1,by=0.1), text.adj=c(-0.2,1.7))

In [None]:
#Install and load AUC package
install.packages('AUC',repos='https://mirrors.tuna.tsinghua.edu.cn/CRAN/')
library('AUC')

In [None]:
#Transform the predictive probability into a numeric variable
predictTrain.n=as.numeric(predictTrain)

#Transform the actual quality into a factor variable
PoorCare_train.f=as.factor(qualityTrain$PoorCare)

#Create an ROC object
ROC_train=roc(predictTrain.n,PoorCare_train.f)

#Calculate the AUC
AUC_train=auc(ROC_train)
AUC_train