Permalink
Browse files

initial import

  • Loading branch information...
0 parents commit 71bd679c4a7971ccdb16913e268bce3a78d5f988 Jeffrey Breen committed Oct 1, 2012
@@ -0,0 +1,3 @@
+data = read.csv('data/mpg-3-13-2012.csv')
+
+View(data)
@@ -0,0 +1,17 @@
+
+# binary package isn't available on CRAN for Mac OS X (monstrous!), but this works:
+# install.packages("XLConnect", type="source")
+
+library(XLConnect)
+
+wb <- loadWorkbook("data/mpg.xlsx", create=F)
+
+data = readWorksheet(wb, sheet='3-7-2012')
+
+str(data)
+
+
+library(ggplot2)
+g = ggplot(subset(data, Eng.Displ > 0), aes(x=Eng.Displ, y=Comb.Unrd.Adj.FE...Conventional.Fuel, color=Air.Aspiration.Method.Desc))
+g = g + geom_point(alpha=0.75, position='jitter') + theme_bw()
+print(g)
@@ -0,0 +1,17 @@
+library(foreign)
+
+sav.file = file.path(system.file(package='foreign'), 'tests', 'sample100.sav')
+spss.data = read.spss(sav.file)
+View(spss.data)
+
+xpt.file = file.path(system.file(package='foreign'), 'tests', 'test.xpt')
+sas.data = read.xport(xpt.file)
+str(sas.data)
+
+dta.file = file.path(system.file(package='foreign'), 'tests', 'auto8.dta')
+stata.data = read.dta(dta.file)
+View(stata.data)
+
+dbf.file = file.path(system.file(package='foreign'), 'files', 'sids.dbf')
+dbf.data = read.dbf(dbf.file)
+View(dbf.data)
@@ -0,0 +1,11 @@
+library(RMySQL)
+
+con = dbConnect(MySQL(), user="root", dbname="test")
+
+dbListFields(con, "airport")
+
+data = dbGetQuery(con, "select * from airport")
+
+dbDisconnect(con)
+
+View(data)
@@ -0,0 +1,19 @@
+con = textConnection('
+# Hi:
+#
+# Please invite these paid volunteers to the spontaneous rally at 3PM today:
+#
+Name Department "Hourly Rate" email
+Alice Operations 32 alice@wonderland.org
+Billy Logistics 5 billy.pilgrim@slaugterhouse5.com
+Winston Records 20 winston.smith@truth.gov.oc
+#
+#Thanks,
+#Your Boss
+#
+')
+
+data = read.table(con, header=T, comment.char='#')
+close.connection(con)
+
+View(data)
@@ -0,0 +1,7 @@
+url = 'http://ichart.finance.yahoo.com/table.csv?s=YHOO&d=8&e=28&f=2012&g=d&a=3&b=12&c=1996&ignore=.csv'
+
+data = read.csv(url)
+
+g = ggplot(data) + geom_point(aes(x=as.Date(Date), y=Close), size = 1)
+g = g + scale_y_log10() + theme_bw()
+print(g)
@@ -0,0 +1,11 @@
+library(XLConnect)
+
+url = "http://www.fueleconomy.gov/feg/EPAGreenGuide/xls/all_alpha_12.xls"
+local.xls.file = 'data/all_alpha_12.xls'
+
+download.file(url, local.xls.file)
+
+wb = loadWorkbook(local.xls.file, create=F)
+data = readWorksheet(wb, sheet='all_alpha_12')
+
+View(data)
@@ -0,0 +1,7 @@
+library(XML)
+
+url = 'http://en.wikipedia.org/wiki/List_of_capitals_in_the_United_States'
+
+state.capitals.df = readHTMLTable(url, which=2)
+
+View(state.capitals.df)
@@ -0,0 +1,5 @@
+library(rdatamarket)
+
+oil.prod = dmseries("http://data.is/nyFeP9")
+
+plot(oil.prod)
@@ -0,0 +1,21 @@
+library(WDI)
+
+# find some indicators:
+WDIsearch('fertility .*total')
+WDIsearch('life expectancy .*birth.*total')
+WDIsearch('GDP per capita .*constant')
+WDIsearch('population, total')
+
+
+data = WDI(country=c('BR', 'CN', 'GB', 'JP', 'IN', 'SE', 'US'),
+ indicator=c('SP.DYN.TFRT.IN', 'SP.DYN.LE00.IN', 'SP.POP.TOTL',
+ 'NY.GDP.PCAP.KD'),
+ start=1900, end=2010)
+
+data$year = as.Date(paste(data$year, '-01-01', sep='') )
+
+colnames(data)[3:6] = c('fertility', 'life expectancy',
+ 'population', 'per capita GDP')
+
+g = gvisMotionChart(data, idvar='country', timevar='year')
+plot(g)
@@ -0,0 +1,34 @@
+library(quantmod)
+
+initial.claims = getSymbols('ICSA', src='FRED', auto.assign=F)
+
+sp500 = getSymbols('^GSPC', src='yahoo', auto.assign=F)
+
+# Convert quotes to weekly and fetch Cl() closing price
+sp500.weekly = Cl(to.weekly(sp500))
+
+# prices end on Friday, must bump to Saturday to match BLS release dates
+index(sp500.weekly) = index(sp500.weekly) + 1
+
+# set column names:
+names(initial.claims) = 'initial.claims'
+names(sp500.weekly) = "sp500"
+
+x = merge(initial.claims, sp500.weekly, all=F)
+stocks.jobs.df = data.frame(date=index(x), coredata(x))
+
+# create a new, normalized (and scaled) data.frame from the original data
+
+source("R/function-normalize.R")
+norm.df = normalize(stocks.jobs.df, sd.scale=1)
+
+# reverse the sense of the initial jobless claims column (since the correlation
+# with the S&P is negative)
+norm.df$initial.claims = -1 * norm.df$initial.claims
+
+library(ggplot2)
+g = ggplot(norm.df) + geom_line(aes(x=date, y=sp500), color='green', size=1, alpha=0.75)
+g = g + geom_line(aes(x=date, y=initial.claims), color='red', size=1, alpha=0.75)
+g = g + theme_bw() + ylab('sd')
+
+print(g)
@@ -0,0 +1,33 @@
+# function-normalize.R
+#
+# for each column in a data.frame, center around zero and scale min/max
+# to some multiple of the standard deviation
+#
+# by Jeffrey Breen <jeffrey@jeffreybreen.com>
+#
+
+normalize = function(df, sd.scale = 2)
+{
+ cols = colnames(df)
+
+ for (col in cols)
+ {
+ x = df[,col]
+
+ if ( is.numeric(x) )
+ {
+ # first, compute the mean and subtract from each data point
+ x.avg = mean(x)
+ x = x - x.avg
+
+ # next, scale by the specified number of standard deviations
+ x.sd = sd(x)
+ x = x / (x.sd * sd.scale)
+
+ df[,col] = x
+ }
+
+ }
+
+ return(df)
+}
Binary file not shown.
Oops, something went wrong.

0 comments on commit 71bd679

Please sign in to comment.