<img tyle="float: right;"  src="http://minneanalytics.org/wp/wp-content/uploads/2018/04/BDT18_LP-02-02.jpg" \>

# Generate Fake Data using [faker](https://github.com/joke2k/faker)

### Faker is a Python package that generates fake data for you. Whether you need to bootstrap your database, create good-looking XML documents, fill-in your persistence to stress test it, or anonymize data taken from a production service, Faker is for you.

In [1]:
from __future__ import print_function
import sys

In [2]:
import warnings
warnings.simplefilter("ignore")
warnings.filterwarnings("ignore", "DeprecationWarning") 
warnings.filterwarnings("ignore", "SAWarning") 

In [3]:
import pandas as pd
import numpy as np
import collections
from collections import *
import random

In [4]:
import time
import datetime
from datetime import *
from dateutil.relativedelta import relativedelta
import unicodedata

In [5]:
from faker import Faker
from faker_web import WebProvider

In [6]:
import tqdm 
from tqdm import *

In [7]:
pd.set_option('display.max_columns', 512)
pd.set_option('display.max_rows', 512)
pd.set_option('display.width', 1024)
pd.set_option('display.max_info_rows', 512)
pd.set_option('display.expand_frame_repr', False)
pd.set_option('mode.chained_assignment','warn')
pd.set_option('precision', 2)
pd.set_option('float_format', '{:6.2f}'.format)
pd.set_option('display.notebook_repr_html', True)

## Instantiate a faker object generator

In [8]:
fake = Faker()
fake.add_provider(WebProvider)
fake.seed(0)

## Direct a shell command to the linux operating system

In [9]:
!mkdir /media/sf_mnlytics/data

mkdir: cannot create directory ‘/media/sf_mnlytics/data’: File exists


## Function to cleanup unwanted chars from a string

In [10]:
def sanitize_string(s):
    if (s == None):
        return "?"
    if len(s) == 0:
        return "?"
    s0 = "".join([ch if unicodedata.category(unicode(ch))[0]!="C" else ' ' for ch in s])
    if len(s0) == 0:
        return "?"
    return s0.strip()

In [11]:
sanitize_string("I have a cr-lf\n I want it removed")

'I have a cr-lf  I want it removed'

## Function to generate fake customer data

In [12]:
def data_gen(count,seed):
    fakeDataDico = []
    fakeDataTuple = []
    fakeColumns = OrderedDict()
    
    fake.seed(seed)    
    ctypes = list(('maestro','mastercard','visa16','visa13','amex','discover','diners','jcb15','jcb16'))
    tday = datetime.now().date()
    for _ in tnrange(count,desc="Data generator",leave=True):
        dob = fake.date_between_dates(date_start=datetime(1917, 1, 1),date_end=datetime(1990, 1, 12))
        ctype = random.choice(ctypes)
        if random.randint(0,1) == 0:
            fakeDataDico.append(OrderedDict({
                'first_name': fake.first_name_male(),
                'last_name': fake.last_name_male(),
                'gender': 'm',
                'date_of_birth': dob.strftime("%m/%d/%Y"),
                'age': relativedelta(tday, dob).years,
                'address': fake.address(),
                'ssn': fake.ssn(),
                'checking': float(fake.pydecimal(left_digits=4, right_digits=2, positive=True)),
                'saving': float(fake.pydecimal(left_digits=8, right_digits=2, positive=True)),        
                'credit_card_full': fake.credit_card_full(card_type=ctype),        
                'credit_card_expire': fake.credit_card_expire(start="now", end="+10y", date_format="%m/%y"),
                'credit_card_provider': fake.credit_card_provider(card_type=ctype),
                'credit_card_number': fake.credit_card_number(card_type=ctype),
                'credit_card_security_code': fake.credit_card_security_code(card_type=ctype),        
                'job': fake.job(),        
                'zipcode': fake.zipcode(),
                'lat': float(fake.latitude()),
                'long': float(fake.longitude())
            }))
        else:
            fakeDataDico.append(OrderedDict({
                'first_name': fake.first_name_female(),
                'last_name': fake.last_name_female(),
                'gender': 'f',
                'date_of_birth': dob.strftime("%m/%d/%Y"),
                'age': relativedelta(tday, dob).years,
                'address': fake.address().encode('ascii', 'ignore'),
                'ssn': fake.ssn(),
                'checking': float(fake.pydecimal(left_digits=4, right_digits=2, positive=True)),
                'saving': float(fake.pydecimal(left_digits=8, right_digits=2, positive=True)),        
                'credit_card_full': fake.credit_card_full().encode('ascii', 'ignore'),        
                'credit_card_expire': fake.credit_card_expire(start="now", end="+10y", date_format="%m/%y"),
                'credit_card_provider': fake.credit_card_provider(),
                'credit_card_number': fake.credit_card_number(),
                'credit_card_security_code': fake.credit_card_security_code(),        
                'job': fake.job(),        
                'zipcode': fake.zipcode(),
                'lat': float(fake.latitude()),
                'long': float(fake.longitude())
            }))

    df_0 = pd.DataFrame(data=fakeDataDico)
    df = df_0[sorted(df_0.columns)].copy(deep=True)
    
    # -------------------------------------------------------------------------------
    # each column of object type(string) will be sanitized for unwanted chars 
    # -------------------------------------------------------------------------------
    for c in df.columns:
        if (df[c].dtype == "object"):
            print("sanitazing:{0} - {1}".format(c,df[c].dtype))
            df[c] = df[c].apply(sanitize_string)
    # -------------------------------------------------------------------------------
    # -------------------------------------------------------------------------------
    # -------------------------------------------------------------------------------

    df.to_csv("/media/sf_mnlytics/data/fake_customers_{0:>02}.csv.gz".format(seed),sep='\t',index=False,index_label=None,compression ="gzip")
    return df

# Loop to generate fake customers files

- set how many files will be created
- set how many row per file will be generated

In [13]:
max_file = 64
max_row = 8192

In [14]:
for s in tnrange(max_file,desc="File generator"):
    _ = data_gen(max_row,s)

HBox(children=(IntProgress(value=0, description=u'File generator', max=64), HTML(value=u'')))

HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


HBox(children=(IntProgress(value=0, description=u'Data generator', max=8192), HTML(value=u'')))

sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object



# Generate a large fake customers file

In [15]:
_ = data_gen(32768,100)

HBox(children=(IntProgress(value=0, description=u'Data generator', max=32768), HTML(value=u'')))


sanitazing:address - object
sanitazing:credit_card_expire - object
sanitazing:credit_card_full - object
sanitazing:credit_card_number - object
sanitazing:credit_card_provider - object
sanitazing:credit_card_security_code - object
sanitazing:date_of_birth - object
sanitazing:first_name - object
sanitazing:gender - object
sanitazing:job - object
sanitazing:last_name - object
sanitazing:ssn - object
sanitazing:zipcode - object


In [16]:
!ls /media/sf_mnlytics/data

dot.txt			   fake_customers_21.csv.gz  fake_customers_44.csv.gz
fake_customers_00.csv.gz   fake_customers_22.csv.gz  fake_customers_45.csv.gz
fake_customers_01.csv.gz   fake_customers_23.csv.gz  fake_customers_46.csv.gz
fake_customers_02.csv.gz   fake_customers_24.csv.gz  fake_customers_47.csv.gz
fake_customers_03.csv.gz   fake_customers_25.csv.gz  fake_customers_48.csv.gz
fake_customers_04.csv.gz   fake_customers_26.csv.gz  fake_customers_49.csv.gz
fake_customers_05.csv.gz   fake_customers_27.csv.gz  fake_customers_50.csv.gz
fake_customers_06.csv.gz   fake_customers_28.csv.gz  fake_customers_51.csv.gz
fake_customers_07.csv.gz   fake_customers_29.csv.gz  fake_customers_52.csv.gz
fake_customers_08.csv.gz   fake_customers_30.csv.gz  fake_customers_53.csv.gz
fake_customers_09.csv.gz   fake_customers_31.csv.gz  fake_customers_54.csv.gz
fake_customers_100.csv.gz  fake_customers_32.csv.gz  fake_customers_55.csv.gz
fake_customers_10.csv.gz   fake_customers_33.csv.gz  fake_customers_56.csv.gz
