initial import

jeffreybreen · Oct 1, 2012 · 71bd679 · 71bd679
commit 71bd679
Show file tree

Hide file tree

Showing 16 changed files with 1,329 additions and 0 deletions.
diff --git a/R/01-read.csv-mpg.R b/R/01-read.csv-mpg.R
@@ -0,0 +1,3 @@
+data = read.csv('data/mpg-3-13-2012.csv')
+
+View(data)
diff --git a/R/02-XLConnect-mpg.R b/R/02-XLConnect-mpg.R
@@ -0,0 +1,17 @@
+
+# binary package isn't available on CRAN for Mac OS X (monstrous!), but this works:
+# install.packages("XLConnect", type="source")
+
+library(XLConnect)
+
+wb <- loadWorkbook("data/mpg.xlsx", create=F)
+
+data = readWorksheet(wb, sheet='3-7-2012')
+
+str(data)
+
+
+library(ggplot2)
+g = ggplot(subset(data, Eng.Displ > 0), aes(x=Eng.Displ, y=Comb.Unrd.Adj.FE...Conventional.Fuel, color=Air.Aspiration.Method.Desc))
+g = g + geom_point(alpha=0.75, position='jitter') + theme_bw()
+print(g)
diff --git a/R/03-foreign.R b/R/03-foreign.R
@@ -0,0 +1,17 @@
+library(foreign)
+
+sav.file = file.path(system.file(package='foreign'), 'tests', 'sample100.sav')
+spss.data = read.spss(sav.file)
+View(spss.data)
+
+xpt.file = file.path(system.file(package='foreign'), 'tests', 'test.xpt')
+sas.data = read.xport(xpt.file)
+str(sas.data)
+
+dta.file = file.path(system.file(package='foreign'), 'tests', 'auto8.dta')
+stata.data = read.dta(dta.file)
+View(stata.data)
+
+dbf.file = file.path(system.file(package='foreign'), 'files', 'sids.dbf')
+dbf.data = read.dbf(dbf.file)
+View(dbf.data)
diff --git a/R/04-RMySQL-airport.R b/R/04-RMySQL-airport.R
@@ -0,0 +1,11 @@
+library(RMySQL)
+
+con = dbConnect(MySQL(), user="root", dbname="test")
+
+dbListFields(con, "airport")
+
+data = dbGetQuery(con, "select * from airport")
+
+dbDisconnect(con)
+
+View(data)
diff --git a/R/05-textConnection-email.R b/R/05-textConnection-email.R
@@ -0,0 +1,19 @@
+con = textConnection('
+# Hi:
+#
+# Please invite these paid volunteers to the spontaneous rally at 3PM today:
+#
+Name      Department   "Hourly Rate"  email
+Alice     Operations     32           alice@wonderland.org
+Billy     Logistics       5           billy.pilgrim@slaugterhouse5.com
+Winston   Records        20           winston.smith@truth.gov.oc
+#
+#Thanks,
+#Your Boss
+#					 
+')
+
+data = read.table(con, header=T, comment.char='#')
+close.connection(con)
+
+View(data)
diff --git a/R/06-read.csv-url-yahoo.R b/R/06-read.csv-url-yahoo.R
@@ -0,0 +1,7 @@
+url = 'http://ichart.finance.yahoo.com/table.csv?s=YHOO&d=8&e=28&f=2012&g=d&a=3&b=12&c=1996&ignore=.csv'
+
+data = read.csv(url)
+
+g = ggplot(data) + geom_point(aes(x=as.Date(Date), y=Close), size = 1) 
+g = g + scale_y_log10() + theme_bw()
+print(g)
diff --git a/R/07-download.file-XLConnect-green.R b/R/07-download.file-XLConnect-green.R
@@ -0,0 +1,11 @@
+library(XLConnect)
+
+url = "http://www.fueleconomy.gov/feg/EPAGreenGuide/xls/all_alpha_12.xls"
+local.xls.file = 'data/all_alpha_12.xls'
+
+download.file(url, local.xls.file)
+
+wb = loadWorkbook(local.xls.file, create=F)
+data = readWorksheet(wb, sheet='all_alpha_12')
+
+View(data)
diff --git a/R/08-readHTMLTable.R b/R/08-readHTMLTable.R
@@ -0,0 +1,7 @@
+library(XML)
+
+url = 'http://en.wikipedia.org/wiki/List_of_capitals_in_the_United_States'
+
+state.capitals.df = readHTMLTable(url, which=2)
+
+View(state.capitals.df)
diff --git a/R/09-rdatamarket.R b/R/09-rdatamarket.R
@@ -0,0 +1,5 @@
+library(rdatamarket)
+
+oil.prod = dmseries("http://data.is/nyFeP9")
+
+plot(oil.prod)
diff --git a/R/10-WDI.R b/R/10-WDI.R
@@ -0,0 +1,21 @@
+library(WDI)
+
+# find some indicators:
+WDIsearch('fertility .*total')
+WDIsearch('life expectancy .*birth.*total')
+WDIsearch('GDP per capita .*constant')
+WDIsearch('population, total')
+
+
+data = WDI(country=c('BR', 'CN', 'GB', 'JP', 'IN', 'SE', 'US'), 
+ 			indicator=c('SP.DYN.TFRT.IN', 'SP.DYN.LE00.IN', 'SP.POP.TOTL',
+ 						'NY.GDP.PCAP.KD'), 
+ 		  	start=1900, end=2010)
+
+data$year = as.Date(paste(data$year, '-01-01', sep='') )
+
+colnames(data)[3:6] = c('fertility', 'life expectancy', 
+						'population', 'per capita GDP')
+
+g = gvisMotionChart(data, idvar='country',  timevar='year')
+plot(g)
diff --git a/R/11-quantmod.R b/R/11-quantmod.R
@@ -0,0 +1,34 @@
+library(quantmod)
+
+initial.claims = getSymbols('ICSA', src='FRED', auto.assign=F)
+
+sp500 = getSymbols('^GSPC', src='yahoo', auto.assign=F)
+
+# Convert quotes to weekly and fetch Cl() closing price
+sp500.weekly = Cl(to.weekly(sp500))
+
+# prices end on Friday, must bump to Saturday to match BLS release dates
+index(sp500.weekly) = index(sp500.weekly) + 1
+
+# set column names:
+names(initial.claims) = 'initial.claims'
+names(sp500.weekly) = "sp500"
+
+x = merge(initial.claims, sp500.weekly, all=F)
+stocks.jobs.df = data.frame(date=index(x), coredata(x))
+
+# create a new, normalized (and scaled) data.frame from the original data
+
+source("R/function-normalize.R")
+norm.df = normalize(stocks.jobs.df, sd.scale=1)
+
+# reverse the sense of the initial jobless claims column (since the correlation
+# with the S&P is negative)
+norm.df$initial.claims = -1 * norm.df$initial.claims
+
+library(ggplot2)
+g = ggplot(norm.df) + geom_line(aes(x=date, y=sp500), color='green', size=1, alpha=0.75)
+g = g + geom_line(aes(x=date, y=initial.claims), color='red', size=1, alpha=0.75)
+g = g + theme_bw() + ylab('sd')
+
+print(g)
diff --git a/R/function-normalize.R b/R/function-normalize.R
@@ -0,0 +1,33 @@
+# function-normalize.R
+#
+# for each column in a data.frame, center around zero and scale min/max 
+# to some multiple of the standard deviation
+#
+# by Jeffrey Breen <jeffrey@jeffreybreen.com>
+#
+
+normalize = function(df, sd.scale = 2)
+{
+	cols = colnames(df)
+
+	for (col in cols)
+	{
+		x = df[,col]
+
+		if ( is.numeric(x) )
+		{
+			# first, compute the mean and subtract from each data point
+			x.avg = mean(x)
+			x = x - x.avg
+
+			# next, scale by the specified number of standard deviations
+			x.sd = sd(x)
+			x = x / (x.sd * sd.scale)
+
+			df[,col] = x
+		}		
+
+	}
+
+	return(df)
+}
diff --git a/data/all_alpha_12.xls b/data/all_alpha_12.xls