Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
174 lines (129 sloc) 4.1 KB
rm(list=ls())
setwd("C:\\Users\\Jean-FÈlix\\Desktop\\Machine Learning\\Application")
reglatex = "C:\\Users\\Jean-FÈlix\\Desktop\\Machine Learning\\Application\\graph.tex"
library(stargazer)
library(MASS)
library(ISLR)
library(e1071)
library(splines)
library(gbm)
library(randomForest)
library(glmnet)
library(glmpath)
library(LiblineaR)
library(rpart.plot)
library(rpart)
library(pROC)
library(ROCR)
library(rattle)
library(cvAUC)
library(xtable)
library(rockchalk)
library(foreign)
library(tree)
jobt = read.dta("C:\\Users\\Jean-FÈlix\\Desktop\\Machine Learning\\Application\\jobt.dta")
attach(jobt)
fix(jobt)
dim(jobt)
jobt=na.omit(jobt)
####################################
# #
# Transformed Outcome Tree #
# #
####################################
## Propensity Score ##
probit.empojt = glm(empojt ~ . - wage, jobt, family = binomial)
summary(probit.empojt)
empojt.probs = predict(probit.empojt, jobt, type = "response")
jobt["p.hat"] = empojt.probs
## Transformed Outcome ##
wage.trans = wage*(empojt-empojt.probs)/(empojt.probs*(1-empojt.probs))
jobt["waget"] = wage.trans
## Resampling ##
set.seed(1)
train = sample(1:nrow(jobt), nrow(jobt)/2)
test = jobt[-train,]
waget.test = jobt[-train, "waget"]
## Tree ##
tree.jobt = tree(waget ~. -empojt -p.hat -wage, jobt, subset = train, mindev = 0.005)
summary(tree.jobt)
plot(tree.jobt)
text(tree.jobt, pretty=0)
## Standard Errors ##
tree.pred.test = predict(tree.jobt, newdata = test)
mean((tree.pred.test-waget.test)^2)
waget1.l1 = subset(test, empojt == 1 & tenure < 3.5)
waget1.l2 = subset(test, empojt == 1 & tenure >= 3.5)
avg1.l1 = mean(waget1.l1$waget)
avg1.l2 = mean(waget1.l2$waget)
waget1.l1["delta"] = (waget1.l1$waget - avg1.l1)^2
waget1.l2["delta"] = (waget1.l2$waget - avg1.l2)^2
std1.l1 = sqrt(mean(waget1.l1$delta))
std1.l2 = sqrt(mean(waget1.l2$delta))
waget0.l1 = subset(test, empojt == 0 & tenure < 3.5)
waget0.l2 = subset(test, empojt == 0 & tenure >= 3.5)
avg0.l1 = mean(waget0.l1$waget)
avg0.l2 = mean(waget0.l2$waget)
waget0.l1["delta"] = (waget0.l1$waget - avg0.l1)^2
waget0.l2["delta"] = (waget0.l2$waget - avg0.l2)^2
std0.l1 = sqrt(mean(waget0.l1$delta))
std0.l2 = sqrt(mean(waget0.l2$delta))
std.l1 = std1.l1 + std0.l1
std.l2 = std1.l2 + std0.l2
std.l1
std.l2
#########################################
# #
# Propensty Score Random Forest #
# #
#########################################
rf.ps= randomForest(empojt ~ . - wage - waget - p.hat, jobt, ntree = 500, importance = TRUE)
rf.ps
varImpPlot(rf.ps)
##################################
# #
# Propensity Score Lasso #
# #
##################################
## Variables and Lambda Grid ##
x = model.matrix(empojt ~ . - wage - waget - p.hat, jobt)[,-1]
attach(jobt)
y = jobt$empojt
grid = 10^seq(10, -2, length = 100)
## Lasso Regression ##
lasso.ps = glmnet(x[train,], y[train], alpha = 1, lambda = grid)
plot(lasso.ps)
## Mean Squared Error ##
set.seed(1)
cv.lasso = cv.glmnet(x[train,], y[train], alpha = 1)
plot(cv.lasso)
bestlam = cv.lasso$lambda.min
xtest = x[-train,]
y.test = y[-train]
lasso.pred = predict(cv.lasso, s = bestlam, newx = xtest)
mean((lasso.pred-y.test)^2)
## Lasso Coefficients ##
lasso.coefs = glmnet(x, y, alpha = 1, lambda = grid)
coef(lasso.coefs, s = bestlam)
##############################
# #
# Causal Tree #
# #
##############################
library(devtools)
library(httr)
library(causalTree)
z = model.matrix(waget ~ . - p.hat, jobt)[,-1]
z = as.data.frame(z)
attach(z)
## Tree ##
causal.tree = causalTree(wage ~. -empojt, data = z,
treatment = empojt, split.Rule = "CT",
cv.option = "CT", split.Honest = T, cv.Honest = F,
split.Bucket = F, xval = 10)
rpart.plot(causal.tree)
## Pruning ##
opcp = causal.tree$cptable[,1][which.min(causal.tree$cptable[,4])]
opfit = prune(causal.tree, opcp)
rpart.plot(opfit)
q()