In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns

np.random.seed(seed=2554)


#CREATING SEGMENT DATA

#Simulating Data - First Steps

segment_variables = ['age','gender','income','kids','own_home','subscribe']
segment_variables_distribution = dict(zip(segment_variables,
                                          ['normal','binomial',
                                          'normal','poisson',
                                          'binomial', 'binomial']))

#defining the statistics for ea. variable in segment
segment_means = {'suburb_mix': [40, 0.5, 55000,2, 0.5, 0.1],
                 'urban_hip':  [24, 0.7, 21000, 1, 0.2, 0.2],
                 'travelers' : [58, 0.5, 64000, 0, 0.7, 0.05],
                 'moving_up' : [36, 0.3, 52000, 2, 0.3, 0.2]} 

#Standard Deviations for Each Segment
# None = not applicable for the variable
segment_stddev = {'suburb_mix': [5, None, 12000, None, None, None],
                 'urban_hip':  [2, None, 5000,  None, None, None],
                 'travelers' : [8, None, 21000,  None, None, None],
                 'moving_up' : [4, None, 10000,  None, None, None]} 

#Setting segment sizes 
segment_names = ['suburb_mix','urban_hip','travelers','moving_up']
segment_sizes = dict(zip(segment_names,[100, 50, 80, 70]))

segment_statistics = {}
for name in segment_names:
    segment_statistics[name] = {'size' : segment_sizes[name]}
    for i, variable in enumerate(segment_variables):
        segment_statistics[name][variable] = {
            'mean' : segment_means[name][i],
            'stddev' : segment_stddev[name][i]
        }


#Final Segment Data Generation

#Pseudocode
#set up dictionary "segment_constructor" and pseudorandom number sequence
#For each SEGMENT i in "segment_names" {
#   Set up a temporary dictionary "segment_data_subset" for this SEGMENTS data
#   For each Variable in "seg_variables"{
#       Check "segment_variables_distribution[variable]" to find distribution type for VARIABLE

#       Look up the segment size and variable mean and standard deviation in segment_Statistics for
#       that SEGMENT and VARIABLE to 
#       ... Draw random data for VARIABLE (within SEGMENT) with
#       ... "size" observations
#       }
# Add this SEGMENT's data ("segment_data_subset") to the overall data ("segment_constructor")

# Create a DataFrame "segment_data" from "segment_constructors"
#}

segment_constructor = {}

#Iterate over segments to create data for each
for name in segment_names:
    size = segment_statistics[name]['size']
    segment_data_subset = {}

    #Within each segment, iterate over the variables and generate data
    for variable in segment_variables:
        if segment_variables_distribution[variable] == 'normal':
            #Draw random normals
            segment_data_subset[variable] = np.random.normal(
                loc = segment_statistics[name][variable]['mean'],
                scale = segment_statistics[name][variable]['stddev'],
                size = size
            )
        elif segment_variables_distribution[variable] == 'poisson':
            #Draw counts
            segment_data_subset[variable] = np.random.poisson(
                lam = segment_statistics[name][variable]['mean'],
                size = size
            )
        elif segment_variables_distribution[variable] == 'binomial':
            #Draw binomial
            segment_data_subset[variable] = np.random.binomial(
                n = 1,
                p = segment_statistics[name][variable]['mean'],
                size = size
            )
        else:
            #Data type unkown
            print('Bad segment data type:{0}'.format(
                segment_variables_distribution[j])
                )
            raise StopIteration
        segment_data_subset['Segment'] = np.repeat(
            name,
            repeats = segment_statistics[name]['size']
        )
        segment_constructor[name] = pd.DataFrame(segment_data_subset)
    segment_data = pd.concat(segment_constructor.values())

segment_data['gender'] = segment_data['gender'].apply(lambda x: 'male' if x else 'female')
segment_data['own_home'] = segment_data['own_home'].apply(lambda x: True if x else False)
segment_data['subscribe'] = segment_data['subscribe'].apply(lambda x: True if x else False)

