In [28]:
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.preprocessing import normalize as sknormalize
import json
from collections import OrderedDict


In [2]:
# Load the Sales file to get the list of USZips where we have sales. 
credentials = {}
credentials['filename'] = 'SalesJan2009.csv'

raw_data = open(credentials['filename'])

# create a data frame with just the columns we need
sales_df = pd.read_csv(raw_data, dtype={"US Zip": np.int32},  na_filter=False, 
                       converters={'US Zip':lambda x: int(x) if x != '' else 0},
                       usecols=['Product','US Zip','Country'])


# just get US sales for a single product and in the Northeast (Zip < 33000)
sales_df = sales_df[ (sales_df.Product == 'Product1') 
                      & (sales_df.Country == 'United States')
                       & (sales_df['US Zip'] < 33000)]

sales_df = sales_df.sort_values(by='US Zip',ascending=[0])


In [3]:
salesZipCodeSet = set(sales_df['US Zip'])

In [29]:
#x19 income file column list
incomeAgeColMap = {}
incomeAgeColMap['income_top'] = 'B19037'
incomeAgeColMap['income_total'] = 1
raceDict = {'WHITE':'A', 'BLACK':'B', 'NATAMER':'C', 'ASIAN':'D', 
                  'PACISL':'E', 'OTHER':'F', 'MULTIRACE':'G', 'HISPLAT':'I'}
incomeAgeColMap['race'] = OrderedDict(sorted(raceDict.items(), key=lambda t: t[1]))
incomeAgeColMap['age_top'] = 'B01001'
incomeAgeColMap['age_total'] = 1


In [34]:
# import the x19 income file
credentials['filename'] = 'x19_income.csv'

def getIncomeAgeColList(ageBin=0, racePrefix=''):
    outList = []
    #note that this skips the < $10k category
    incomeRangeVals = range(4,19) #[4, 5, ... 19] these are the list of field numbers

    ageBinIncrement = 17

    for ival in incomeRangeVals:    
        field = incomeAgeColMap['income_top']+'e{}'.format(ival + ageBin*ageBinIncrement)
        if racePrefix != '':
            field = incomeAgeColMap['income_top']+'{}e{}'.format(racePrefix, ival + ageBin*ageBinIncrement)
            
        outList.append(field)
    
    return outList
            
def getFirstTwoIncomeAgeColList():
    return ['GEOID',incomeAgeColMap['income_top']+'e'+str(incomeAgeColMap['income_total'])]

def getFullIncomeAgeColList():
    outList = getFirstTwoIncomeAgeColList()
    numAgeBins = 4  #<25, 25-44, 45-64 and 65+
    ####
    # BUT, we're only interested in ages greater than 25.
    # so range(1, numAgeBins)
    ####
    
    for i in range(1, numAgeBins):
        outList += getIncomeAgeColList(i)
        
    for race, racePrefix in incomeAgeColMap['race'].iteritems():
        for i in range(1, numAgeBins):
            outList += getIncomeAgeColList(i, racePrefix)

    return outList

def replaceGEOID(colList):
    return ['US Zip' if x == 'GEOID' else x for x in colList]
    
def replaceGEOIDAndSort(colList):
    #assumes that GEOID is first element in list
    return ['US Zip'] +  sorted(colList[1:])
    #return ['US Zip' if x == 'GEOID' else x for x in colList]\

incomeAgeColList = getFullIncomeAgeColList()
income_df = pd.read_csv(open(credentials['filename']), usecols=incomeAgeColList)

#Set the column names -- first replace GEOID with US Zip
#Also need to resort
incomeAgeColList = replaceGEOID(incomeAgeColList)
income_df.columns = incomeAgeColList

#Convert the values from the GEOID to a US Zip integer
income_df['US Zip'] = income_df['US Zip'].apply(lambda x: int(x[-5:]))


#Restrict to just zip codes in our selected region where we have sales.
income_df = income_df[income_df['US Zip'].isin(salesZipCodeSet)]

#Set US Zip as index
income_df = income_df.set_index('US Zip')


In [47]:
# import the x19 income file
credentials['filename'] = 'x01_age_sex.csv'

#this is needed by getFlatAgeColList
#and later when building the zipcode demographic PDF
fineAgeRangeVals = range(9,17) #[9, 10, ... 16] these are the list of field numbers
    
def getAgeColListPairs(ival, racePrefix):
    genderBinShift = 15    
    outList = [incomeAgeColMap['age_top']+'{}e{}'.format(racePrefix, ival),
              incomeAgeColMap['age_top']+'{}e{}'.format(racePrefix, ival+genderBinShift)]
    
    return outList
         
def sortFlatAgeColList(x):
    preX = x[len(incomeAgeColMap['age_top']):] #strip off the 'B01001'
    if preX[0] == 'e': #we are not in a 'race' list preX looks like 'e23'
        return ('', int(preX[1:]))  #return is ('', X) where X is a number
    else:   #preX looks like 'Ae23'
        return (preX[0], int(preX[2:])) #return is ('Z', X), where Z is race prefix and X is number
    
        
def getFlatAgeColList(racePrefix):
    outList = []
    for ival in fineAgeRangeVals:
        outList += getAgeColListPairs(ival, racePrefix)
    return sorted(outList, key=sortFlatAgeColList)

def getFirstAgeColList():
    return ['GEOID',incomeAgeColMap['age_top']+'e'+str(incomeAgeColMap['age_total'])]

def getFullAgeColList():
    outList = getFirstAgeColList()
    for race, racePrefix in incomeAgeColMap['race'].iteritems():
            outList += getFlatAgeColList(racePrefix)

    return outList


fineAgeColList = getFullAgeColList()

age_df = pd.read_csv(open(credentials['filename']), usecols=fineAgeColList)

#Set the column names -- first replace GEOID with US Zip
fineAgeColList = replaceGEOID(fineAgeColList)
age_df.columns = fineAgeColList
#print fineAgeColList

#Convert the values from the GEOID to a US Zip integer
age_df['US Zip'] = age_df['US Zip'].apply(lambda x: int(x[-5:]))
#print age_df['US Zip']

#Restrict to just zip codes in our selected region where we have sales.
age_df = age_df[age_df['US Zip'].isin(salesZipCodeSet)]

#Set US Zip as index
age_df = age_df.set_index('US Zip')

In [143]:
def getAgeIncomeRaceDataFrameLabels():
    #age labels are 25-44, 45-64, 65+
#     ageIncomeList = ["25-44", "45-44", "65+"]
    ageIncomeList = [26, 46, 66]
    
#     fineAgeList = [ "25-29",
#     "30-34",
#     "35-44",
#     "45-54",
#     "55-64",
#     "65-74",
#     "75-84",
#     "85+"]

    fineAgeList = [26, 31, 36, 46, 56, 66, 76, 86]
        
    #income labels available in the data... 
    # 10-14.9  in thousands of dollars
    # 15-19.9
    # 20-24.9
    # 25-29.9
    # 30...
    # 200+
    
    # but to make it more manageable, can do larger bins
    #10-29.9
    #30-49.9
    #50-69.9
    #70-89.9
    #90-109.9
    #110-129.9
    #130-149.9
    #150+
    #incomeList = ["10-29", "30-49", "50-74", "75-99", "100-124", "124-149", "150-199", "200+"]
    incomeList = [11, 31, 51, 76, 101, 126, 151, 201]


    # In order to facilitate random sampling, instead of string labels, 
    # these labels are 1 + leading bin edge.
    
    return (ageIncomeList, incomeList, fineAgeList)

def rePackIncomeAgeColListToPdfsBins(i, rP, data):
    iaclist = getIncomeAgeColList(i, rP)
    #print iaclist
    
    ## This, of course, assumes a particular order
    # which is somewhat guaranteed with the code above
    
    return [data[iaclist[0]]+data[iaclist[1]]+data[iaclist[2]]+data[iaclist[3]],  #10-29k, B19037Xe[4+i*17]-e[7+i*17]
            data[iaclist[4]]+data[iaclist[5]]+data[iaclist[6]]+data[iaclist[7]],  #40-49k, B19037Xe[8+i*17]-e[11+i*17]
            data[iaclist[8]]+data[iaclist[9]],                                    #50-74k, B19037Xe[12+i*17]-e[13+i*17]
            data[iaclist[10]],                                                    #75-99k, B19037Xe[14+i*17]
            data[iaclist[11]],                                                    #100-124k, B19037Xe[15+i*17]
            data[iaclist[12]],                                                    #125-149k, B19037Xe[14+i*17]
            data[iaclist[13]],                                                    #150-199k, B19037Xe[14+i*17]
            data[iaclist[14]]                                                     #200+k, B19037Xe[14+i*17]
            ]
    
def getLargBinAgeIncomeRacePdf(zipCode):
    
    #from the x19_income.csv
    #see the function above, getAgeIncomeRaceDataFrameLabels -- I've combined the income into larger bin size
    #
#     whiteIncomeAgePdf = np.array([[1, 22, 30, 45, 23, 10, 1, 0, 0], 
#                                   [0, 0, 1, 0, 5, 56, 78, 2, 1], 
#                                   [0, 1, 15, 10, 25, 23, 0, 1, 0]], np.float64)
    income_data_for_zip = income_df.loc[zipCode]
    
    incomeAgePdfsByRace = {}
    total = None
    for race, racePrefix in incomeAgeColMap['race'].iteritems():
        
        incomeAgePdfsByRace[race] = np.array([rePackIncomeAgeColListToPdfsBins(i, racePrefix, income_data_for_zip) for i in [1,2,3]],
                                 np.float64)
        
        if total is None:
            total = np.array(incomeAgePdfsByRace[race])
        else:
            total += incomeAgePdfsByRace[race]

    incomeAgePdfsByRace['total'] = total
    
    return incomeAgePdfsByRace

def getFineAgeRacePdf(zipCode):
    #from the x01_age_sex.csv
#     "25-29",
#     "30-34",
#     "35-44",
#     "45-54",
#     "55-64",
#     "65-74",
#     "75-84",
#     "85+"]
    fineAge_data_for_zip = age_df.loc[zipCode]
    fineAgePdfsByRace = {}
    total = None
    for race, racePrefix in incomeAgeColMap['race'].iteritems():
        
        fineAgePdfsByRace[race] = np.array([sum([fineAge_data_for_zip[x] for x in getAgeColListPairs(i, racePrefix)]) for i in fineAgeRangeVals], np.float64)
        
        if total is None:
            total = np.array(fineAgePdfsByRace[race])
        else:
            total += fineAgePdfsByRace[race]

    fineAgePdfsByRace['total'] = total

    return fineAgePdfsByRace
    
def getAgeIncomeRacePdf(zipCode):
    
    #we're going to make a dictionary. the key for each dictionary will be "race", the 
    #value will be a list, the first element will be the race count, the second element will be a Pandas DF of income v age
    #where the first element is the income distribution for ages 25-44, the second is income distribution for age 45-64,
    #and thirt is the income distribution for ages 65 and greater.
    #The third element of the list will be the age distribution for the zip as a Pandas Series

 
    incomeAgePdfsByRace = getLargBinAgeIncomeRacePdf(zipCode)

    fineAgePdfsByRace = getFineAgeRacePdf(zipCode)
    
#     whiteAgePdf = np.array([100, 105, 95, 123, 101, 95, 89, 80, 76, 72, 34, 23, 13], np.int32)
#     whiteAgePdf = whiteAgePdf*whiteIncomeAgePdf.sum()/whiteAgePdf.sum()
#     whiteAgePdf = whiteAgePdf.round()
    
#     blackAgePdf = np.array([95, 95, 95, 73, 81, 75, 69, 60, 56, 42, 14, 3, 0], np.int32)
#     blackAgePdf = blackAgePdf*blackIncomeAgePdf.sum()/blackAgePdf.sum()
#     blackAgePdf = blackAgePdf.round()
    
#     asianAgePdf = np.array([100, 105, 95, 123, 101, 95, 89, 80, 76, 72, 34, 23, 13], np.int32)
#     asianAgePdf = asianAgePdf*asianIncomeAgePdf.sum()/asianAgePdf.sum()
#     asianAgePdf = asianAgePdf.round()
    
#     otherAgePdf = np.array([100, 105, 95, 123, 101, 95, 89, 80, 76, 72, 34, 23, 13], np.int32)
#     otherAgePdf = otherAgePdf*otherIncomeAgePdf.sum()/otherAgePdf.sum()
#     otherAgePdf = otherAgePdf.round()
    
#     allAgePdf = whiteAgePdf + blackAgePdf + asianAgePdf + otherAgePdf
    
    pdf = {}
    labels = getAgeIncomeRaceDataFrameLabels()
    
    for race,racePrefix in incomeAgeColMap['race'].iteritems():
        pdf[race] = [incomeAgePdfsByRace[race].sum(), 
                    pd.DataFrame(incomeAgePdfsByRace[race], columns=labels[1], index=labels[0]),
                    pd.Series(fineAgePdfsByRace[race], index=labels[2])]
        
#     pdf['white'] = [whiteIncomeAgePdf.sum(), pd.DataFrame(whiteIncomeAgePdf, columns=labels[1], index=labels[0]), pd.Series(whiteAgePdf, index=labels[2])]
#     pdf['black'] = [blackIncomeAgePdf.sum(), pd.DataFrame(blackIncomeAgePdf, columns=labels[1], index=labels[0]), pd.Series(blackAgePdf, index=labels[2])]
#     pdf['asian'] = [asianIncomeAgePdf.sum(), pd.DataFrame(asianIncomeAgePdf, columns=labels[1], index=labels[0]), pd.Series(asianAgePdf, index=labels[2])]
#     pdf['other'] = [otherIncomeAgePdf.sum(), pd.DataFrame(otherIncomeAgePdf, columns=labels[1], index=labels[0]), pd.Series(otherAgePdf, index=labels[2])]
#     pdf['total'] = [allIncomeAgePdf.sum(), pd.DataFrame(allIncomeAgePdf, columns=labels[1], index=labels[0]), pd.Series(allAgePdf, index=labels[2])]
        
    raceLabels = incomeAgeColMap['race'].keys()
    #using the finAgePdfs seems to be better since there are more data
    pdf['race'] = pd.Series([fineAgePdfsByRace[race].sum() for race in raceLabels], index=raceLabels)
    #pdf['race'] = pd.Series([incomeAgePdfsByRace[race].sum() for race in raceLabels], index=raceLabels)
    pdf['zip'] = zipCode
    
    return pdf

def calculateAgeIncomeRaceCdf(pdf):
    ageInRCdf = {}
    for k,v in pdf.iteritems():
        if k not in ['race', 'zip']:
            ageInRCdf[k] = [v[0], v[1].cumsum(axis = 1), v[2].cumsum()]
        elif k == 'race':
            ageInRCdf[k] = v.cumsum()
        elif k == 'zip': 
            ageInRCdf[k] = v
            
    return ageInRCdf

def normalizePdf(pdf):
    normalizedPdfReturn = {}
    for k, v, in pdf.iteritems():
        if k not in ['race', 'zip']:
            normalizedPdfReturn[k] = [v[0], pd.DataFrame(sknormalize(v[1], norm='l1'), columns=v[1].columns, index=v[1].index), v[2]/v[2].sum()]
        elif k == 'race':
            normalizedPdfReturn[k] = v/v.sum()
        elif k == 'zip': 
            normalizedPdfReturn[k] = v
            
    return normalizedPdfReturn

def normalizeCdf(cdf):
    ageInRNormedCdf = {}
    for k,v in cdf.iteritems():
        if k not in ['race', 'zip']:
            ageInRNormedCdf[k] = [v[0], pd.DataFrame(sknormalize(v[1], norm='max'), columns=v[1].columns, index=v[1].index), v[2]/v[2][v[2].size-1]]
        elif k == 'race':
            ageInRNormedCdf[k] = v/v[v.size-1]
        elif k == 'zip': 
            ageInRNormedCdf[k] = v
            
    return ageInRNormedCdf


def getRandomPopulation(normedPdfs, N=1):
    people = []
    randomRace = np.random.choice(normedPdfs['race'].index, N, p=normedPdfs['race'])
    for race in randomRace:
        individual = {'race':race, 'zip':normedPdfs['zip']}
        individual['age']  = np.random.choice(normedPdfs[race][2].index, 1, p=normedPdfs[race][2])[0]
        
        ageBin = 26
        if individual['age'] >= 46 and individual['age']:
            ageBin = 46
        elif individual['age'] >= 66:
            ageBin = 66

        individual['income'] = np.random.choice(normedPdfs[race][1].T.index*1000.0, 1, p=normedPdfs[race][1].T[ageBin])[0]
        
        people.append(individual)
    
    return people

In [147]:
getRandomPopulation(normalizePdf(getAgeIncomeRacePdf(1810)), 100)

[{'age': 26, 'income': 31000.0, 'race': 'HISPLAT', 'zip': 1810},
 {'age': 56, 'income': 201000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 76, 'income': 151000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 56, 'income': 151000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 56, 'income': 51000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 31, 'income': 51000.0, 'race': 'ASIAN', 'zip': 1810},
 {'age': 46, 'income': 126000.0, 'race': 'BLACK', 'zip': 1810},
 {'age': 36, 'income': 76000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 56, 'income': 151000.0, 'race': 'ASIAN', 'zip': 1810},
 {'age': 46, 'income': 151000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 76, 'income': 76000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 46, 'income': 101000.0, 'race': 'ASIAN', 'zip': 1810},
 {'age': 66, 'income': 76000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 46, 'income': 31000.0, 'race': 'WHITE', 'zip': 1810},
 {'age': 46, 'income': 201000.0, 'race': 'ASIAN', 'zip': 1810},
 {'age': 66, 'income': 201000.0, 'race': 'WHI

In [None]:
def wrapInDataframeAndScale(pdfs):
    newPdfs = {}
    labels = getAgeIncomeRaceDataFrameLabels()
    for k, v in pdfs.iteritems():
        if k not in ['name','price']:
            newPdfs[k] = pd.DataFrame(v['pdf']/v['scale'], index=labels[1], columns=labels[2])
        else:
            newPdfs[k] = v
            
    return newPdfs

def product1Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product1', 'price':70}
    prodPdf['white'] = {'pdf':np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #11k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #31k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #51k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #71k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #91k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #111k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #131k
            [3, 3, 3, 3, 5, 5, 5, 2, 2, 2, 1, 1, 1], #151k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  #181k
        ], np.float64),
                        'scale':100.}
    
    prodPdf['black'] = {'pdf':np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #11k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #31k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #51k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #71k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #91k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #111k
            [40, 40, 40, 40, 50, 50, 50, 35, 35, 35, 10, 10, 10], #131k
            [3, 3, 3, 3, 5, 5, 5, 2, 2, 2, 1, 1, 1], #151k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]  #181k
        ], np.float64),
                        'scale':100.0}
    
    prodPdf['asian'] = {'pdf':np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #11k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #31k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #51k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #71k
            [5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1], #91k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #111k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #131k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #151k
            [40, 40, 40, 40, 50, 50, 50, 35, 35, 35, 10, 10, 10]  #181k
        ], np.float64),
                        'scale':100.0}
        
    prodPdf['other'] = {'pdf':np.array([
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #11k
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], #31k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #51k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #71k
            [10, 10, 10, 10, 15, 15, 15, 8, 8, 8, 1, 1, 1], #91k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #111k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #131k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5], #151k
            [20, 20, 20, 20, 25, 25, 25, 18, 18, 18, 5, 5, 5]  #181k
        ], np.float64),
                        'scale':100.0}
    
    return prodPdf

def product2Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product2', 'price':120}
    prodPdf['white'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 3]  #181k
        ], np.float64),
                        'scale':600}
    
    prodPdf['black'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 1], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 1], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 1]  #181k
        ], np.float64),
                        'scale':600}
    
    prodPdf['asian'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 2]  #181k
        ], np.float64),
                        'scale':600}
        
    prodPdf['other'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 2]  #181k
        ], np.float64),
                        'scale':600}
        
    return prodPdf

def product3Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product3', 'price':10}
    prodPdf['white'] = {'pdf':np.array([
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
        ], np.float64),
                        'scale':100}
    
    prodPdf['black'] = {'pdf':np.array([
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
        ], np.float64),
                        'scale':100}
    
    prodPdf['asian'] = {'pdf':np.array([
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
        ], np.float64),
                        'scale':100}
        
    prodPdf['other'] = {'pdf':np.array([
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 10],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] 
        ], np.float64),
                        'scale':100}
        
    return prodPdf


def product4Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product4', 'price':200}
    prodPdf['white'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
    
    prodPdf['black'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5 ,2], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 3]  #181k
        ], np.float64),
                        'scale':500}
    
    prodPdf['asian'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
        
    prodPdf['other'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
        
    return prodPdf

    
def product5Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product5', 'price':200}
    prodPdf['white'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
    
    prodPdf['black'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
    
    prodPdf['asian'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
        
    prodPdf['other'] = {'pdf':np.array([
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #11k
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], #31k
            [20, 20, 20, 5, 5, 5, 2, 2, 2, 1, 1, 1, 1], #51k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #71k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #91k
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], #111k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5 ,2], #131k
            [200, 200, 200, 100, 100, 100, 10, 10, 10, 5, 5, 5, 2], #151k
            [400, 400, 400, 100, 100, 100, 40, 40, 40, 10, 10, 10, 3]  #181k
        ], np.float64),
                        'scale':500}
        
    return prodPdf


def product6Pdf():
    
    #Construct Product PDF - race v income v age
    #generate 2D income v age (9 x 13) PDF for each race
    
    #income: [11, 31, 51, 71, 91, 111, 131, 151, 181]
    #age: [26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86]
    
    prodPdf = {'name':'Product6', 'price':200}
    prodPdf['white'] = {'pdf':np.array([
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 5],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 5],
            [40, 40, 40, 40, 40, 40, 40, 40, 40, 10, 10, 10, 5],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1],
            [40, 40, 40, 10, 10, 10, 4, 4, 4, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [10, 10, 10, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], 
            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
        ], np.float64),
                        'scale':500}
    
    prodPdf['black'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
    
    prodPdf['asian'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10}
        
    prodPdf['other'] = {'pdf':np.array([
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1],
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1], 
            [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1] 
        ], np.float64),
                        'scale':10.0}
        
    return prodPdf

    
    
    

In [None]:
N = 500
people = getRandomPopulation(normalizePdf(getAgeIncomeRacePdf(98116)), N)
scaledProdPdfs = wrapInDataframeAndScale(product5Pdf())

def simulatePurchases(productPdf, shoppers):
    
    rands = np.random.random(len(shoppers))
    print len(rands)
    randIter = iter(rands)
    purchases = []
    
    for shopper in shoppers:
        prob = productPdf[shopper['race']][shopper['age']][int(shopper['income']/1000)]
        if randIter.next() < prob:
            #Transaction_date,Product,Price,Payment_Type,Name,City,State,Country,Account_Created,Last_Login,Latitude,Longitude,US Zip
            #01/02/2009 04:53,Product1,1200,Visa,Betina,Parkville,MO,United States,1/2/2009 04:42,1/2/2009 07:49,39.195,-94.68194,64152
            sale = {
                "Transaction_date":"01/01/2009",
                "Product":scaledProdPdfs['name'],
                "Price":scaledProdPdfs['price'],
                "US Zip":shopper['zip'],
                'customer':shopper,
                'Country':'United States'
            }
            purchases.append(sale)
            
    return purchases

purchases = simulatePurchases(scaledProdPdfs, people)
print len(purchases)

print json.dumps(purchases, indent=1)

In [None]:
getAgeIncomeRacePdf(98116)

In [None]:
len(purchases)

In [None]:
purchases
