In [4]:
# %load LinRegLearner.py
"""
A simple wrapper for linear regression.  (c) 2015 Tucker Balch
"""

import numpy as np

class LinRegLearner(object):

    def __init__(self, verbose = False):
        pass # move along, these aren't the drones you're looking for

    def addEvidence(self,dataX,dataY):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """

        # slap on 1s column so linear regression finds a constant term
        newdataX = np.ones([dataX.shape[0],dataX.shape[1]+1])
        newdataX[:,0:dataX.shape[1]]=dataX

        # build and save the model
        self.model_coefs, residuals, rank, s = np.linalg.lstsq(newdataX, dataY)
        
    def query(self,points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to a specific query.
        @returns the estimated values according to the saved model.
        """
        return (self.model_coefs[:-1] * points).sum(axis = 1) + self.model_coefs[-1]

if __name__=="__main__":
    print "the secret clue is 'zzyzx'"


the secret clue is 'zzyzx'


In [None]:
# %load testlearner.py
"""
Test a learner.  (c) 2015 Tucker Balch
"""

import numpy as np
import math
import LinRegLearner as lrl

if __name__=="__main__":
    inf = open('Data/ripple.csv')
    data = np.array([map(float,s.strip().split(',')) for s in inf.readlines()])

    # compute how much of the data is training and testing
    train_rows = math.floor(0.6* data.shape[0])
    test_rows = data.shape[0] - train_rows

    # separate out training and testing data
    trainX = data[:train_rows,0:-1]
    trainY = data[:train_rows,-1]
    testX = data[train_rows:,0:-1]
    testY = data[train_rows:,-1]

    print testX.shape
    print testY.shape

    # create a learner and train it
    learner = lrl.LinRegLearner(verbose = True) # create a LinRegLearner
    learner.addEvidence(trainX, trainY) # train it

    # evaluate in sample
    predY = learner.query(trainX) # get the predictions
    rmse = math.sqrt(((trainY - predY) ** 2).sum()/trainY.shape[0])
    print
    print "In sample results"
    print "RMSE: ", rmse
    c = np.corrcoef(predY, y=trainY)
    print "corr: ", c[0,1]

    # evaluate out of sample
    predY = learner.query(testX) # get the predictions
    rmse = math.sqrt(((testY - predY) ** 2).sum()/testY.shape[0])
    print
    print "Out of sample results"
    print "RMSE: ", rmse
    c = np.corrcoef(predY, y=testY)
    print "corr: ", c[0,1]

    #learners = []
    #for i in range(0,10):
        #kwargs = {"k":i}
        #learners.append(lrl.LinRegLearner(**kwargs))


In [7]:
 dict(a='6',b='7')

{'a': '6', 'b': '7'}

In [13]:
a = np.random.random((5,5))

In [14]:
a

array([[ 0.60096922,  0.52337591,  0.91874355,  0.76102785,  0.56455702],
       [ 0.63086049,  0.88606088,  0.61661155,  0.2624461 ,  0.24583625],
       [ 0.01973378,  0.2772443 ,  0.62400951,  0.73075216,  0.45881075],
       [ 0.22760621,  0.44380852,  0.84897319,  0.41005337,  0.14408671],
       [ 0.4859172 ,  0.44626881,  0.02020651,  0.61070284,  0.61850924]])

In [16]:
a.mean(axis = 0)

array([ 0.39301738,  0.51535169,  0.60570886,  0.55499646,  0.40635999])

In [18]:
np.random.randn(100)*5

array([  5.85895073,  -3.8183966 ,   6.329727  ,  -6.98604677,
        -6.14053172,   2.36659603,   3.86668298,   1.733075  ,
        -0.09228146,  -2.45260826,   4.22871329,   5.80890507,
         7.53110918,  -7.6109338 ,  -2.12253727,   4.56736187,
         2.35359602,   3.38778883,  -2.2724162 ,  -8.9061816 ,
        -0.68216519,  -4.80531501,  -5.66226691,  -5.00461601,
        -0.32589687,   0.73597768,  -7.34531348,   3.39512039,
        -5.27172539,  -1.12524847,   0.37580551,  -5.23132544,
         4.44999466,  -6.37832277,  -7.67411802,  -0.79947696,
         1.32454129,   2.86676807,   2.1174278 ,  -5.0071728 ,
        -4.11866313,   4.51180757,   1.37091041,  -0.17085355,
        -2.60851525,   6.53724333,   7.02397721,  -4.57015299,
         0.90318312,  -0.23179443,  -8.51018714,  -1.73397673,
        -1.3773961 , -10.67278314,  -3.81447756,  -7.48950398,
         3.05657174,   2.59006565,  -7.34350663,   0.89428649,
         2.74328734,  -2.91800882,   0.17205785,   0.83

In [22]:
v = np.array([1,2,3])
p = np.array([0.2,0.5,0.3])
bins = np.cumsum(p)
print bins

[ 0.2  0.7  1. ]


In [23]:
help(np.random.random_sample)

Help on built-in function random_sample:

random_sample(...)
    random_sample(size=None)
    
    Return random floats in the half-open interval [0.0, 1.0).
    
    Results are from the "continuous uniform" distribution over the
    stated interval.  To sample :math:`Unif[a, b), b > a` multiply
    the output of `random_sample` by `(b-a)` and add `a`::
    
      (b - a) * random_sample() + a
    
    Parameters
    ----------
    size : int or tuple of ints, optional
        Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
        ``m * n * k`` samples are drawn.  Default is None, in which case a
        single value is returned.
    
    Returns
    -------
    out : float or ndarray of floats
        Array of random floats of shape `size` (unless ``size=None``, in which
        case a single float is returned).
    
    Examples
    --------
    >>> np.random.random_sample()
    0.47108547995356098
    >>> type(np.random.random_sample())
    <type 'float'>
    >>> n

In [25]:
np.power(0.64,0.5)

0.80000000000000004

In [29]:
np.ones(5, dtype = 'int')

array([1, 1, 1, 1, 1])