# Create and use synthetic data 

Useful for bootstrapping data, creating anonymous data, or other times you need real-looking data

In [2]:
import numpy as np
import pandas as pd
from faker import Faker
fake = Faker()
import datetime
import random

## Create synthetic data

#### Synthetic text

In [33]:
# Create synthetic text
syntext = fake.text()
syntext

'Attorney city month collection evening. Lawyer focus listen age hit. Someone election effect role with whole.'

#### Synthetic name and address

In [5]:
# Create fake name
fake.name()

'Bradley Brown'

In [6]:
# Create fake U.S. address
fake.address()

'1091 Paula Ranch Suite 170\nNew Jennifer, MO 07094'

In [8]:
from faker import Factory

# Set to France
fake2 = Factory.create('fr_FR')

# Create fake French name
fake2.name()

'Gabriel Lecoq'

In [9]:
# Create fake French address
fake2.address()

'rue de Gaillard\n86774 Morin'

#### Other foreign countries include:
- bg_BG - Bulgaria
- cs_CZ - Czech Republic
- de_DE - Germany
- dk_DK - Denmark
- el_GR - Greece
- en_AU - Australia
- en_CA - Canada
- en_GC - England
- en_US - United States
- es_ES - Spain
- es_MX - Mexico
- fa_IR - Iran
- hi_IN - India
- it_IT - Italy
- ja_JP - Japan
- ko_KR - Korean
- ru_RU - Russia
- sv_SE - Sweden
- tr_TR - Turkey
- uk_UA - Ukraine
- zh_CN - China
- zh_TW - Taiwan

#### Synthetic businesses

In [10]:
# Create empty list
business = list()

In [17]:
lat = fake.latitude()
print(lat)

6.100142


In [18]:
# Create fake business name, address, and geolocation coordinates for map
for n in range (0,50):
    business_name = fake.company()
    business_address = fake.address().replace('\n', ', ')
    lat = fake.latitude()
    long = fake.longitude()
    url = fake.url()
    business.append([business_name, business_address, lat, long, url])

In [24]:
# Save into a dataframe
companies = pd.DataFrame(business, columns=['company', 'address', 'lat', 'long', 'url'])

# Fix lat and long data type
companies['lat'] = pd.to_numeric(companies['lat'])
companies['long'] = pd.to_numeric(companies['long'])

In [26]:
companies.head(10)

Unnamed: 0,company,address,lat,long,url
0,Graham-Moran,"14799 Kathy River Apt. 283, Barkershire, OR 54603",-56.34095,33.493974,https://www.black.com/
1,Hopkins-Smith,"784 Matthew Lodge Apt. 746, Carneyborough, AL ...",-82.421858,-70.284524,https://moore.com/
2,Mckay-Peterson,"USCGC Patel, FPO AA 22503",45.244301,159.248283,http://hayes-jackson.net/
3,Taylor-Hernandez,"USNS Roman, FPO AE 59111",-27.024773,47.512085,http://www.shields.com/
4,Young Inc,"54376 Morse Wells, North Justinbury, AR 48072",-13.228623,17.077575,https://morris.info/
5,Sullivan LLC,"USNV Clark, FPO AE 83875",-7.292528,-86.705685,http://www.jones-riggs.net/
6,Lopez-Henderson,"USNV Noble, FPO AE 72790",51.413168,-38.296531,http://www.kelly.biz/
7,"Dodson, Franklin and Ortega","5461 Christopher Trail Suite 385, West Allison...",39.378939,-117.887059,http://www.wilson.com/
8,Tucker-Stewart,"6983 Rice Circles, North Kathleenshire, NV 56409",19.152974,-46.005279,https://higgins.biz/
9,Lewis Inc,"95709 Jason Curve Apt. 044, Hudsonbury, IL 01185",-58.651748,96.624803,http://cross-ray.com/


#### Create fake profile of a person

In [32]:
person = fake.profile()
person

{'job': 'Lighting technician, broadcasting/film/video',
 'company': 'Craig, Brooks and Gonzales',
 'ssn': '521-14-2683',
 'residence': '0704 Bird Pike\nEast Wayne, RI 71229',
 'current_location': (Decimal('57.1934875'), Decimal('-55.351392')),
 'blood_group': 'B-',
 'website': ['https://adams-kelley.org/',
  'http://mercado.com/',
  'https://www.berg.com/'],
 'username': 'cindy95',
 'name': 'Howard Reed',
 'sex': 'M',
 'address': '28921 Robles Stravenue\nLake Emily, AL 02409',
 'mail': 'ncampbell@hotmail.com',
 'birthdate': datetime.date(1929, 2, 27)}

#### Create synthetic online credit card purchase

In [45]:
# Create dictionary with dictionary method
agents = dict()

# Create a bunch of unique user_agents
while len(agents) < 1001:
    agents[fake.user_agent()] = 1

In [46]:
transact = list()

for n in range(0,1000):
    credit_card = fake.credit_card_full().split('\n')
    num_and_exp_date = credit_card[2].split(' ')
    card_num = num_and_exp_date[0]
    exp_date = num_and_exp_date[1]
    card_type = credit_card[0]
    card_holder_name = credit_card[1]
    merchant_idx = random.randint(0, len(companies)-1)
    merchant_name = companies.iloc[merchant_idx][0]
    merchant_address = companies.iloc[merchant_idx][1]
    transaction_date = fake.date_time_this_decade().strftime('%A %b %d, %Y at %I:%M:%S %p')
    amount = "{:.2f}".format(random.uniform(0.50, 1500))
    user_agent_str = agents.popitem()[0] #Pulls only unique user_agent strings
    user_ip = fake.ipv6()
    
    transact.append([transaction_date, amount, card_num, exp_date, card_type, card_holder_name, user_agent_str, user_ip, merchant_name, merchant_address])

In [47]:
# Create a dataframe
transactions = pd.DataFrame(transact, columns=['Date', 'Amount','Card Number', 'Expiration Date', 'Card Type', 'Card Owner', 'User Agent', 'User IP', 'Merchant Name', 'Merchant Address'])

In [48]:
transactions.head()

Unnamed: 0,Date,Amount,Card Number,Expiration Date,Card Type,Card Owner,User Agent,User IP,Merchant Name,Merchant Address
0,"Wednesday Jan 08, 2020 at 04:20:50 PM",1030.85,2295231375783314,08/23,Mastercard,Douglas Ferguson,Mozilla/5.0 (Android 3.2; Mobile; rv:65.0) Gec...,e8df:9d44:dd08:ff6a:907e:ba30:6aeb:1199,Rodriguez Inc,"91068 Kimberly Wells, Port Cathy, GA 87206"
1,"Monday Jan 06, 2020 at 04:37:48 PM",366.8,4558835026366598,01/21,VISA 16 digit,Craig Haynes,Mozilla/5.0 (Macintosh; PPC Mac OS X 10_9_4 rv...,23fe:1f38:b228:264:10f6:f045:fe0a:5c27,Mejia and Sons,"638 Tammy Cove, Vargaschester, ID 52401"
2,"Wednesday Jan 08, 2020 at 05:14:05 AM",1193.68,30319586603522,02/25,Diners Club / Carte Blanche,Donna Allen,Mozilla/5.0 (Windows NT 4.0) AppleWebKit/534.0...,76b7:40c2:6b67:d897:debc:9f1:b0dc:42dd,Turner Ltd,"4608 Berry Run Apt. 072, Shepherdton, WV 29069"
3,"Saturday Jan 04, 2020 at 08:15:07 PM",1199.78,3545539443739910,12/23,JCB 16 digit,Melissa Jones,Mozilla/5.0 (Android 7.1.1; Mobile; rv:12.0) G...,8a23:6146:8d51:37f5:796c:c033:aa2b:fa93,"Romero, Kennedy and Moreno","26236 Jeffrey Mountain Suite 637, North Gracet..."
4,"Monday Jan 06, 2020 at 10:20:14 AM",11.06,4220834172178889720,06/20,VISA 19 digit,Mark Nicholson,Mozilla/5.0 (Macintosh; U; PPC Mac OS X 10_7_5...,c98b:62e5:d925:5ce3:10d2:98f2:df51:1514,Hopkins-Smith,"784 Matthew Lodge Apt. 746, Carneyborough, AL ..."
