Skip to content

Commit

Permalink
initial import
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffrey Breen committed Oct 1, 2012
0 parents commit 71bd679
Show file tree
Hide file tree
Showing 16 changed files with 1,329 additions and 0 deletions.
3 changes: 3 additions & 0 deletions R/01-read.csv-mpg.R
@@ -0,0 +1,3 @@
data = read.csv('data/mpg-3-13-2012.csv')

View(data)
17 changes: 17 additions & 0 deletions R/02-XLConnect-mpg.R
@@ -0,0 +1,17 @@

# binary package isn't available on CRAN for Mac OS X (monstrous!), but this works:
# install.packages("XLConnect", type="source")

library(XLConnect)

wb <- loadWorkbook("data/mpg.xlsx", create=F)

data = readWorksheet(wb, sheet='3-7-2012')

str(data)


library(ggplot2)
g = ggplot(subset(data, Eng.Displ > 0), aes(x=Eng.Displ, y=Comb.Unrd.Adj.FE...Conventional.Fuel, color=Air.Aspiration.Method.Desc))
g = g + geom_point(alpha=0.75, position='jitter') + theme_bw()
print(g)
17 changes: 17 additions & 0 deletions R/03-foreign.R
@@ -0,0 +1,17 @@
library(foreign)

sav.file = file.path(system.file(package='foreign'), 'tests', 'sample100.sav')
spss.data = read.spss(sav.file)
View(spss.data)

xpt.file = file.path(system.file(package='foreign'), 'tests', 'test.xpt')
sas.data = read.xport(xpt.file)
str(sas.data)

dta.file = file.path(system.file(package='foreign'), 'tests', 'auto8.dta')
stata.data = read.dta(dta.file)
View(stata.data)

dbf.file = file.path(system.file(package='foreign'), 'files', 'sids.dbf')
dbf.data = read.dbf(dbf.file)
View(dbf.data)
11 changes: 11 additions & 0 deletions R/04-RMySQL-airport.R
@@ -0,0 +1,11 @@
library(RMySQL)

con = dbConnect(MySQL(), user="root", dbname="test")

dbListFields(con, "airport")

data = dbGetQuery(con, "select * from airport")

dbDisconnect(con)

View(data)
19 changes: 19 additions & 0 deletions R/05-textConnection-email.R
@@ -0,0 +1,19 @@
con = textConnection('
# Hi:
#
# Please invite these paid volunteers to the spontaneous rally at 3PM today:
#
Name Department "Hourly Rate" email
Alice Operations 32 alice@wonderland.org
Billy Logistics 5 billy.pilgrim@slaugterhouse5.com
Winston Records 20 winston.smith@truth.gov.oc
#
#Thanks,
#Your Boss
#
')

data = read.table(con, header=T, comment.char='#')
close.connection(con)

View(data)
7 changes: 7 additions & 0 deletions R/06-read.csv-url-yahoo.R
@@ -0,0 +1,7 @@
url = 'http://ichart.finance.yahoo.com/table.csv?s=YHOO&d=8&e=28&f=2012&g=d&a=3&b=12&c=1996&ignore=.csv'

data = read.csv(url)

g = ggplot(data) + geom_point(aes(x=as.Date(Date), y=Close), size = 1)
g = g + scale_y_log10() + theme_bw()
print(g)
11 changes: 11 additions & 0 deletions R/07-download.file-XLConnect-green.R
@@ -0,0 +1,11 @@
library(XLConnect)

url = "http://www.fueleconomy.gov/feg/EPAGreenGuide/xls/all_alpha_12.xls"
local.xls.file = 'data/all_alpha_12.xls'

download.file(url, local.xls.file)

wb = loadWorkbook(local.xls.file, create=F)
data = readWorksheet(wb, sheet='all_alpha_12')

View(data)
7 changes: 7 additions & 0 deletions R/08-readHTMLTable.R
@@ -0,0 +1,7 @@
library(XML)

url = 'http://en.wikipedia.org/wiki/List_of_capitals_in_the_United_States'

state.capitals.df = readHTMLTable(url, which=2)

View(state.capitals.df)
5 changes: 5 additions & 0 deletions R/09-rdatamarket.R
@@ -0,0 +1,5 @@
library(rdatamarket)

oil.prod = dmseries("http://data.is/nyFeP9")

plot(oil.prod)
21 changes: 21 additions & 0 deletions R/10-WDI.R
@@ -0,0 +1,21 @@
library(WDI)

# find some indicators:
WDIsearch('fertility .*total')
WDIsearch('life expectancy .*birth.*total')
WDIsearch('GDP per capita .*constant')
WDIsearch('population, total')


data = WDI(country=c('BR', 'CN', 'GB', 'JP', 'IN', 'SE', 'US'),
indicator=c('SP.DYN.TFRT.IN', 'SP.DYN.LE00.IN', 'SP.POP.TOTL',
'NY.GDP.PCAP.KD'),
start=1900, end=2010)

data$year = as.Date(paste(data$year, '-01-01', sep='') )

colnames(data)[3:6] = c('fertility', 'life expectancy',
'population', 'per capita GDP')

g = gvisMotionChart(data, idvar='country', timevar='year')
plot(g)
34 changes: 34 additions & 0 deletions R/11-quantmod.R
@@ -0,0 +1,34 @@
library(quantmod)

initial.claims = getSymbols('ICSA', src='FRED', auto.assign=F)

sp500 = getSymbols('^GSPC', src='yahoo', auto.assign=F)

# Convert quotes to weekly and fetch Cl() closing price
sp500.weekly = Cl(to.weekly(sp500))

# prices end on Friday, must bump to Saturday to match BLS release dates
index(sp500.weekly) = index(sp500.weekly) + 1

# set column names:
names(initial.claims) = 'initial.claims'
names(sp500.weekly) = "sp500"

x = merge(initial.claims, sp500.weekly, all=F)
stocks.jobs.df = data.frame(date=index(x), coredata(x))

# create a new, normalized (and scaled) data.frame from the original data

source("R/function-normalize.R")
norm.df = normalize(stocks.jobs.df, sd.scale=1)

# reverse the sense of the initial jobless claims column (since the correlation
# with the S&P is negative)
norm.df$initial.claims = -1 * norm.df$initial.claims

library(ggplot2)
g = ggplot(norm.df) + geom_line(aes(x=date, y=sp500), color='green', size=1, alpha=0.75)
g = g + geom_line(aes(x=date, y=initial.claims), color='red', size=1, alpha=0.75)
g = g + theme_bw() + ylab('sd')

print(g)
33 changes: 33 additions & 0 deletions R/function-normalize.R
@@ -0,0 +1,33 @@
# function-normalize.R
#
# for each column in a data.frame, center around zero and scale min/max
# to some multiple of the standard deviation
#
# by Jeffrey Breen <jeffrey@jeffreybreen.com>
#

normalize = function(df, sd.scale = 2)
{
cols = colnames(df)

for (col in cols)
{
x = df[,col]

if ( is.numeric(x) )
{
# first, compute the mean and subtract from each data point
x.avg = mean(x)
x = x - x.avg

# next, scale by the specified number of standard deviations
x.sd = sd(x)
x = x / (x.sd * sd.scale)

df[,col] = x
}

}

return(df)
}
Binary file added data/all_alpha_12.xls
Binary file not shown.

0 comments on commit 71bd679

Please sign in to comment.