Permalink
Switch branches/tags
Nothing to show
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
90 lines (79 sloc) 2.32 KB
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sun Aug 13 21:35:36 2017
@author: eric
"""
import random, pylab, numpy
random.seed(0)
def rSquared(observed, predicted):
error = ((predicted - observed)**2).sum()
meanError = error/len(observed)
return 1 - (meanError/numpy.var(observed))
class tempDatum(object):
def __init__(self, s):
info = s.split(',')
self.high = float(info[1])
self.year = int(info[2][0:4])
def getHigh(self):
return self.high
def getYear(self):
return self.year
def getTempData():
inFile = open('../unit-3/temperatures.csv')
data = []
next(inFile) # ignore the first line
for l in inFile:
data.append(tempDatum(l))
return data
def getYearlyMeans(data):
years = {}
for d in data:
try:
years[d.getYear()].append(d.getHigh())
except:
years[d.getYear()] = [d.getHigh()]
for y in years:
years[y] = sum(years[y])/len(years[y])
return years
data = getTempData()
years = getYearlyMeans(data)
xVals, yVals = [], []
for e in years:
xVals.append(e)
yVals.append(years[e])
pylab.plot(xVals, yVals)
pylab.xlabel('Year')
pylab.ylabel('Mean Daily High (C)')
pylab.title('Select U.S. Cities')
def splitData(xVals, yVals):
toTrain = random.sample(range(len(xVals)),
len(xVals)//2)
trainX, trainY, testX, testY = [],[],[],[]
for i in range(len(xVals)):
if i in toTrain:
trainX.append(xVals[i])
trainY.append(yVals[i])
else:
testX.append(xVals[i])
testY.append(yVals[i])
return trainX, trainY, testX, testY
numSubsets = 10
dimensions = (1, 2, 3)
rSquares = {}
for d in dimensions:
rSquares[d] = []
for f in range(numSubsets):
trainX, trainY, testX, testY = splitData(xVals, yVals)
for d in dimensions:
model = pylab.polyfit(trainX, trainY, d)
estYVals = pylab.polyval(model, trainX)
estYVals = pylab.polyval(model, testX)
rSquares[d].append(rSquared(testY, estYVals))
print('Mean R-squares for test data')
for d in dimensions:
mean = round(sum(rSquares[d])/len(rSquares[d]), 4)
sd = round(numpy.std(rSquares[d]), 4)
print('For dimensionality', d, 'mean =', mean,
'Std =', sd)
print(rSquares[1])