# Faker for testing databases
Learn to use the `faker` module to allow testing database designs.

In [3]:
from faker import Faker

In [4]:
faker = Faker()

In [9]:
print(faker.credit_card_full())

JCB 16 digit
Daniel Hicks
3548967809011045 11/30
CVC: 906



In [10]:
faker.date_of_birth()

datetime.date(1937, 4, 18)

In [12]:
faker.phone_number()

'3738560605'

In [14]:
print(faker.paragraph())

Everybody on may group reduce page.


In [15]:
faker.license_plate()

'022 3XR'

In [16]:
faker.street_address()

'8954 Tina Spring Suite 348'

In [17]:
faker.coordinate()

Decimal('-110.498235')

In [19]:
faker.words(4)

['whether', 'economic', 'woman', 'like']

## Learn about Cursors
Cursors are the usual way of issuing database queries and processing their results.

In [20]:
import json
with open('cred.json') as f:
    creds = json.load(f)

In [21]:
list(creds)

['host', 'user', 'password']

In [22]:
import pymysql

# establish a database connection
conn = pymysql.connect(
    host=creds['host'], 
    user=creds['user'], 
    passwd=creds['password'],
    autocommit=True,
)

In [23]:
cursor = conn.cursor(
    cursor=pymysql.cursors.DictCursor)

In [27]:
cursor.execute("CREATE SCHEMA dimitri_test")

1

In [28]:
s = '''
Yesterday, 
all my troubles seemed so far away.
Now it seems as though they're here to stay.
Oh, how I long for yesterday.
'''

In [32]:
cursor.execute("""
CREATE TABLE IF NOT EXISTS dimitri_test.fake_person(
person_id int not NULL,
first_name varchar(30) NOT NULL,
last_name varchar(30) NOT NULL,
date_of_birth date NOT NULL,
primary key(person_id)
)
"""
)

0

In [36]:
# insert 
cursor.execute(
    """
    INSERT INTO dimitri_test.fake_person 
    (person_id, first_name, last_name, date_of_birth) VALUES
    (%s, %s, %s, %s)
""", (5, faker.first_name(), faker.last_name(), faker.date_of_birth()))

1

In [37]:
import tqdm

In [38]:
for i in tqdm.tqdm(range(1000, 1200)):
    cursor.execute("""
    INSERT INTO 
    dimitri_test.fake_person (person_id, first_name, last_name, date_of_birth) 
    VALUES (%s, %s, %s, %s)
    """, (i, faker.first_name(), faker.last_name(), faker.date_of_birth()))

100%|████████████████████████████████████████████████████████████████████████| 200/200 [00:07<00:00, 25.36it/s]


In [42]:
cursor.execute("""
SELECT * FROM dimitri_test.fake_person
""")

203

In [44]:
cursor.fetchall()

[]

In [None]:
cursor.execute("""
DROP TABLE dimitri_test.fake_person
""")

In [None]:
import datetime

In [45]:
cursor.execute("""USE dimitri_test""")

0

In [48]:
cursor.execute("""SELECT * FROM fake_person""")

203

In [52]:
cursor.fetchone()

{'person_id': 1000,
 'first_name': 'Corey',
 'last_name': 'Barrett',
 'date_of_birth': datetime.date(1970, 12, 12)}

In [None]:
cursor.fetchall()

In [53]:
cursor.execute("""SELECT * FROM dimitri_test.fake_person""")
for rec in cursor:
    print(rec)

{'person_id': 3, 'first_name': 'Cody', 'last_name': 'Robbins', 'date_of_birth': datetime.date(1986, 3, 16)}
{'person_id': 4, 'first_name': 'Kathleen', 'last_name': 'Mueller', 'date_of_birth': datetime.date(1922, 7, 29)}
{'person_id': 5, 'first_name': 'Tara', 'last_name': 'Huber', 'date_of_birth': datetime.date(1910, 5, 20)}
{'person_id': 1000, 'first_name': 'Corey', 'last_name': 'Barrett', 'date_of_birth': datetime.date(1970, 12, 12)}
{'person_id': 1001, 'first_name': 'Heather', 'last_name': 'Jackson', 'date_of_birth': datetime.date(1942, 1, 31)}
{'person_id': 1002, 'first_name': 'Amber', 'last_name': 'Rodriguez', 'date_of_birth': datetime.date(2006, 11, 27)}
{'person_id': 1003, 'first_name': 'Stephanie', 'last_name': 'Faulkner', 'date_of_birth': datetime.date(1930, 1, 22)}
{'person_id': 1004, 'first_name': 'James', 'last_name': 'Jackson', 'date_of_birth': datetime.date(1929, 8, 27)}
{'person_id': 1005, 'first_name': 'Colleen', 'last_name': 'Good', 'date_of_birth': datetime.date(1975, 

In [54]:
import datetime

In [55]:
faker.date_between(datetime.date(2018, 2, 3), 'today')

datetime.date(2021, 5, 20)

In [None]:
cursor.execute("""
drop table fake_death
""")

In [None]:
cursor.execute("""
CREATE TABLE dimitri_test.fake_death(
    person_id int not null,
    date_of_death date NOT NULL,
    primary key(person_id), 
    foreign key (person_id) REFERENCES dimitri_test.fake_person (person_id))
""")

In [None]:
cursor.execute("""
CREATE TABLE hotel_reserviation(
    
    hotel varchar(20) not null
    room  int not null,
    reservation_date date,
    person_id int not null,
    
    unique index (person_id, reservation_date),    
    primary key (hotel, room, reservation_date),
    foreign key (person_id) references fake_person(person_id)
""")

In [None]:
cursor.execute("""
CREATE TABLE bank_account (
    
    bank_id  int not null, 
    account int not null,
    
    primary key(bank_id, account)

""")

cursor.execute("""
CREATE TABLE bank_account_owner (
    
    bank_id  int not null, 
    account int not null,
    person_id int not null,
    
    primary key(bank_id, account, person_id),
    foreign key (person_id) references fake_person(person_id),
    foreign key (bank_id, account) references fake_person(bank_id, account),

""")

In [None]:
cursor.execute("""
SELECT * FROM fake_person
""")
cursor.fetchone()

In [None]:
cursor.execute("""INSERT into fake_death (date_of_death) values ('2020-10-09')""")

In [None]:
cursor.execute("""INSERT into fake_death (person_id, date_of_death) values (1000, '2020-09-09')""")

In [None]:
persons = cursor.execute("""SELECT person_id, date_of_birth FROM dimitri_test.fake_person""")
for rec in cursor.fetchall():
    cursor.execute("""
    INSERT INTO dimitri_test.fake_death (person_id, date_of_death) VALUES (%s, %s)
    """, (rec['person_id'], faker.date_between(rec['date_of_birth'], rec['date_of_birth'] + datetime.timedelta(days=40000))))
    

In [None]:
cursor.execute("""
SELECT first_name, floor(DATEDIFF(date_of_death, date_of_birth)/365.25) as died_at
FROM dimitri_test.fake_person NATURAL JOIN dimitri_test.fake_death""")

for rec in cursor:
    print(rec)

In [None]:
cursor.execute("""
DROP TABLE dimitri_test.fake_death
""")

## Terminology

Translation from relational terminology into database programming

* Tuple -> Row
* Attribute -> Field/column
* Attribute value -> cell
* Relation -> Table
* Domain -> data type