In [None]:
install.packages('ISLR')

In [None]:
library('ISLR')
data(Carseats)

In [None]:
head(Carseats)

In [None]:
library('rpart')

In [None]:
?rpart

In [None]:
set.seed(128321093)
ss_train = sample(1:nrow(Carseats),floor(nrow(Carseats)/2))
dim(Carseats)
length(ss_train)

In [None]:
train = Carseats[ss_train,]
validation = Carseats[-ss_train,]

In [None]:
dim(train)
dim(validation)

In [None]:
model = rpart(Sales~.,data=train,method="anova",
    control = rpart.control(maxdepth=30,minsplit=2,xval=10))

In [None]:
plot(model)
text(model)

In [None]:
ct = model$cptable
ct

In [None]:
plot(log(ct[,"CP"]),ct[,"xerror"])

In [None]:
opt_alpha = ct[which.min(ct[,"xerror"]),"CP"]
opt_alpha

In [None]:
plot(log(ct[,"CP"]),ct[,"xerror"])
abline(v=log(opt_alpha))

In [None]:
pruned = prune(model,cp=opt_alpha)

In [None]:
pruned

In [None]:
plot(pruned)
text(pruned)

In [None]:
RSS_train_full = sum((predict(model,train)-train$Sales)^2)
RSS_train_pruned = sum((predict(pruned,train)-train$Sales)^2)
RSS_train_full
RSS_train_pruned

In [None]:
RSS_val_full = sum((predict(model,validation)-train$Sales)^2)
RSS_val_pruned = sum((predict(pruned,validation)-train$Sales)^2)
RSS_val_full
RSS_val_pruned

### Classification

In [None]:
install.packages('palmerpenguins')

In [None]:
library('MASS')

In [None]:
library('palmerpenguins')

In [None]:
penguins = penguins[complete.cases(penguins),]

In [None]:
classmod = rpart(species~.,data=penguins,method="class")

In [None]:
plot(classmod)
text(classmod)

In [None]:
library('ggplot2')

In [None]:
plot_fit = function(v1,v2,df=penguins,N=floor(sqrt(10000)),scaleit=FALSE,fmla='species~.',cp=0){
    train_df = df[,c('species',v1,v2)]
    if(scaleit)
        train_df[,c(v1,v2)] = scale(train_df[,c(v1,v2)])
    
    # fit model
    mod = rpart(formula=as.formula(fmla),data=train_df,method="class",
                   control = rpart.control(maxdepth=30,minsplit=1,minbucket=1,cp=cp))
    
    r1 = range(train_df[[v1]])
    r2 = range(train_df[[v2]])
    
    s1 = seq(r1[1],r1[2],length.out=N)
    s2 = seq(r2[1],r2[2],length.out=N)
    
    p_df = expand.grid(v1=s1,v2=s2)
    colnames(p_df) = c(v1,v2)
    preds = predict(mod,newdata=p_df,type='class')
    
    p_df$species = preds
    
    ggplot(data=p_df,mapping=aes_string(x=v1,y=v2,fill='species',shape='species'))+geom_tile()+
        geom_point(data=train_df,size=5)
}

In [None]:
plot_fit(v1='flipper_length_mm',v2='bill_depth_mm',cp=0)

In [None]:
plot_fit(v1='flipper_length_mm',v2='bill_depth_mm',cp=.01)

In [None]:
plot_fit(v1='flipper_length_mm',v2='bill_depth_mm',cp=1)