## Goal:  generate a file of fake data; the number of entities determined by a user

### Step 1:  Import libraries

In [1]:
import random
import pandas as pd

### Step 2:  Make the fake data

* For names, I downloaded the national data text file of popular baby names for 2019 from the SSA website:  https://www.ssa.gov/oact/babynames/limits.html

In [2]:
namesdf=pd.read_csv('yob2019.txt',header=None)  # read in the txt file to a dataframe
names = namesdf[0].to_list()                    # change the first column (which is where the names are stored) to a list




* For emails, I generated 100 emails using the website https://www.generatedata.com/ and made a text file

In [3]:
emailsdf=pd.read_csv('emails.txt',header=None)   # read in the txt file to a dataframe
emails=emailsdf[0].tolist()                      # change the first column to a list


* For phone numbers, I generated 100 phone numbers using the website https://www.generatedata.com/ and made a text file

In [4]:
phonesdf=pd.read_csv('phones.txt',header=None)   # read in the txt file to a dataframe
phones=phonesdf[0].tolist()                      # change the first column to a list


*  For street addresses, I generated 100 street addresses using the website https://www.generatedata.com/ and made a text file

In [5]:
addressesdf=pd.read_csv('addresses.txt',header=None)   # read in the txt file to a dataframe
addresses=addressesdf[0].tolist()                      # change the first column to a list

*  For cities, I generated 100 cities using the website https://www.generatedata.com/ and made a text file

In [6]:
citiesdf=pd.read_csv('cities.txt',header=None)   # read in the txt file to a dataframe
cities=citiesdf[0].tolist()                      # change the first column to a list

*  For states, I found a csv file that included states at the website https://worldpopulationreview.com/states/state-abbreviations which I downloaded

In [7]:
statesdf=pd.read_csv('csvData.csv',header=None)      # read in the csv file to a dataframe
states=statesdf[2][1:].tolist()                      # change the third column to a list


*  For zip codes, I generated 100 zip codes using the website https://www.generatedata.com/ and made a text file

In [8]:
zipsdf=pd.read_csv('zips.txt',header=None)   # read in the txt file to a dataframe
zips=zipsdf[0].tolist()                      # change the first column to a list


* For number of kids, number of speeding tickets, and whether the person rents or owns their home, I just created reasonable lists

In [9]:
kids=[0,1,2,3,4,5,6]                         # I assumed most people have between 0 and 6 kids
speed_tx=[0,1,2,3,4]                         # I assumed most people get between 0 and 4 speeding tix per year
home=['rent','own']                          # Two options for home situation


*  For birth year, I created a list from the year 1920 to 2020

In [10]:
year=[]                                      # open a list
for i in range(1920,2021):                   # for all the integers between 1920 and 2020, add the integer to the list
    year.append(i)


*  For salary, I created a list from \\$15,000 to \\$250,000, with a step of \\$5000

In [11]:
salary=[]                                    # open a list
for i in range(15000,255000,5000):           # add all integers starting at 15000, increasing by 5000, until 250000
    salary.append(i)                         # to the list


### Step 3:  Write the functions for choosing random entities from these lists

*  Names

In [12]:
def fake_names(number):                              # define a function that takes the number of entities the user wants
    names_list=[]                                    # open a list
    for i in range(number):                          # for all the entities the user wants
        names_list.append(random.choice(names))      # chose a random name from names and add it to the list
    return names_list                                # return the list


In [13]:
fake_names(10)                                       # test function

['Kaleab',
 'Trixie',
 'Runa',
 'Brylei',
 'Brexlyn',
 'Mazin',
 'Korie',
 'Aivan',
 'Semaje',
 'Aero']

* Emails

In [14]:
def fake_emails(number):                            # define a function that takes the number of entities the user wants
    emails_list=[]                                  # open a list
    for i in range(number):                         # for all the entities the user wants
        emails_list.append(random.choice(emails))   # chose a random email from emails and add it to the list
    return emails_list                              # return the list

In [15]:
fake_emails(10)                                     # test function

['sapien.Nunc@atnisiCum.net',
 'pede@velfaucibusid.ca',
 'est.vitae@ametlorem.ca',
 'urna.suscipit@dignissimtempor.co.uk',
 'luctus@nisimagnased.org',
 'est.arcu@ultrices.net',
 'at.velit.Pellentesque@amet.edu',
 'urna.suscipit@dignissimtempor.co.uk',
 'vitae.odio@urna.edu',
 'vitae.sodales@Morbinon.org']

*  Phone numbers

In [16]:
def fake_phones(number):                           # define a function that takes the number of entities the user wants
    phones_list=[]                                 # open a list
    for i in range(number):                        # for all the entities the user wants
        phones_list.append(random.choice(phones))  # chose a random phone number from phones and add it to the list
    return phones_list                             # return the list

In [17]:
fake_phones(10)                                    # test the function

['(653) 487-9774',
 '(995) 856-8904',
 '(261) 161-5981',
 '(999) 875-4388',
 '(310) 783-2786',
 '(609) 149-6420',
 '(472) 815-6783',
 '(840) 679-0058',
 '(780) 202-3690',
 '(220) 692-1919']

*  Street address, city, state, zip

In [18]:
def fake_address(number):
    address_list=[]
    for i in range(number):
        address_list.append(random.choice(addresses)) # chose a random street address from addresses and add it to the list
    return address_list

In [19]:
fake_address(10)                                      # test

['Ap #293-1008 Ac Street',
 '8898 Laoreet Rd.',
 '430-9502 Feugiat St.',
 '619-3165 Quisque Avenue',
 '3810 Integer Rd.',
 '137-8799 Urna. Ave',
 '8073 Elementum',
 '8898 Laoreet Rd.',
 '8300 Amet',
 'P.O. Box 847']

In [20]:
def fake_city(number):
    city_list=[]
    for i in range(number):
        city_list.append(random.choice(cities))  # chose a random city from cities and add it to the list
    return city_list

In [21]:
fake_city(10)                                    # test

['Wayaux',
 'Linkebeek',
 'Wansin',
 'Beypazarı',
 'San Javier',
 'Nanded',
 'Juneau',
 'Bertiolo',
 "Lutsel K'e",
 'Paço do Lumiar']

In [22]:
def fake_state(number):
    state_list=[]
    for i in range(number):
        state_list.append(random.choice(states))  # chose a random state from states and add it to the list
    return state_list

In [23]:
fake_state(10)                                    # test

['OR', 'MO', 'TX', 'OR', 'IN', 'NM', 'NM', 'HI', 'ND', 'ND']

In [24]:
def fake_zips(number):
    zips_list=[]
    for i in range(number):
        zips_list.append(random.choice(zips))     # chose a random zip code from zips and add it to the list
    return zips_list

In [25]:
fake_zips(10)                                     # test

[15874, 75905, 64979, 18275, 66129, 94121, 15874, 75370, 69615, 62740]

* Number of kids

In [26]:
def fake_kids(number):                         # define a function that takes the number of entities the user wants
    kids_list=[]                               # open a list
    for i in range(number):                    # for all the entities the user wants
        kids_list.append(random.choice(kids))  # chose a random number of kids from kids and add it to the list
    return kids_list                           # return the list

In [27]:
fake_kids(10)                                  # test

[5, 5, 3, 1, 5, 2, 5, 5, 6, 5]

* Number of speeding tickets in a year

In [28]:
def fake_speed_tx(number):                              # define a function that takes the number of entities the user wants
    speed_tx_list=[]                                    # open a list
    for i in range(number):                             # for all the entities the user wants
        speed_tx_list.append(random.choice(speed_tx))   # chose a random number of tickets from speed_tx and add it to the list
    return speed_tx_list                                # return the list

In [29]:
fake_speed_tx(10)                                       # test

[2, 2, 4, 3, 1, 4, 1, 0, 1, 0]

* Home status

In [30]:
def fake_home(number):                                # define a function that takes the number of entities the user wants
    home_list=[]                                      # open a list
    for i in range(number):                           # for all the entities the user wants
        home_list.append(random.choice(home))         # chose a home status from home and add it to the list
    return home_list                                  # retun the list

In [31]:
fake_home(10)                                         # test

['own', 'own', 'own', 'rent', 'rent', 'rent', 'own', 'own', 'own', 'own']

* Birth year

In [32]:
def fake_year(number):                            # define a function that takes the number of entities the user wants
    year_list=[]                                  # open a list
    for i in range(number):                       # for all the entities the user wants
        year_list.append(random.choice(year))     # chose a birth year from year and add it to the list
    return year_list                              # return the list

In [33]:
fake_year(10)                                     # test

[1929, 1966, 1933, 1925, 1974, 2009, 1963, 1975, 1966, 1957]

*  Annual salary

In [34]:
def fake_salary(number):                            # define a function that takes the number of entities the user wants
    salary_list=[]                                  # open a list
    for i in range(number):                         # for all the entities the user wants
        salary_list.append(random.choice(salary))   # chose a salary from salary and add it to the list
    return salary_list                              # return the list

In [35]:
fake_salary(10)                                     # test

[210000, 235000, 180000, 95000, 100000, 50000, 185000, 200000, 25000, 25000]

### Step 4:  Ask the user the number of entities they'd like and export generated data to a csv file

In [36]:
print('How many rows would you like:')
x=input()                                      # take as input the number of entities the user would like

a=fake_names(int(x))                           # change x to an integer and feed it into all functions 
b=fake_year(int(x))
c=fake_emails(int(x))
d=fake_phones(int(x))
e=fake_address(int(x))
f=fake_city(int(x))
g=fake_state(int(x))
h=fake_zips(int(x))
i=fake_kids(int(x))
j=fake_speed_tx(int(x))
k=fake_home(int(x))
l=fake_salary(int(x))

df=pd.DataFrame()                             # create a data frame 
df['Name']=a                                  # add the lists that comes back from the functions to the data frame
df['Birth Year']=b
df['Email']=c
df['Phone Number']=d
df['Street Address']=e
df['City']=f
df['State']=g
df['Zip Code']=h
df['Number of Kids']=i
df['Number of Speeding Tickets']=j
df['Housing']=k
df['Salary']=l
df.to_csv('fake_data.csv', index=False)       # export the data fram to a csv file

How many rows would you like:
15
