In [1]:
from csv import reader
from faker import Faker
from faker.providers import BaseProvider, DynamicProvider
import pandas as pd
from psycopg2 import connect, extensions, sql
import random

In [2]:
with open('./csv-files/vehicle-models.csv') as file:
    read_file = reader(file)
    cars = list(read_file)

In [3]:
cars_sum = []
for list in cars:
    cars_sum.append(list[2] + ' ' + list[3])

print(cars_sum[:5])

['BMW 218i Coupe M Sport P 3,590,000 (AT)', 'BMW 220i Coupe M Sport P 3,690,000 (AT)', 'BMW M240i Coupe M Sport P 4,890,000 (AT)', 'BMW 318i Sport P 3,790,000 (AT)', 'BMW 318i Touring P 3,890,000 (AT)']


In [4]:
bank_list = ["BDO", "Banco de Oro",
             "MB", "Metrobank",
             "BPI", "Bank of the Philippine Islands",
             "LBP", "Land Bank of the Philippines", "Land Bank",
             "DBP", "Development Bank of the Philippines", "Development Bank"
             "PNB", "Philippine National Bank", 
             "RCBC", "Rizal Commercial Banking Corporation",
             "UBP", "Union Bank of the Philippines",
             "Chinabank", 
             "AUB", "Asia United Bank", "Asia United",
             "In-house"]

payment_terms = ["12", "24", "36", "48", "60", "1", "2", "3", "4", "5", "6"]

car_colors = ["White", "white",
              "Black", "black", "blk"
              "Gray", "gray", "Grey", "gray",
              "Silver", "silver",
              "Red", "red",
              "Green", "green",
              "Blue", "blue", "blu"
              "Orange", "orange",
              "Yellow", "yellow", "ylw"
              "Teal", "Sky Blue", "Skyblue", "skyblue"]

In [5]:
f = Faker(['en_PH'])

car_sold = DynamicProvider(
    provider_name='car_sold',
    elements=cars_sum
)

f.add_provider(car_sold)
print(f.car_sold())

bank_provider = DynamicProvider(
    provider_name='bank',
    elements=bank_list
)

f.add_provider(bank_provider)
print(f.bank())

payment_terms_provider = DynamicProvider(
    provider_name='terms',
    elements=payment_terms
)

f.add_provider(payment_terms_provider)
print(f.terms())

car_colors_provider = DynamicProvider(
    provider_name='car_color',
    elements=car_colors
)

f.add_provider(car_colors_provider)
print(f.car_color())

class LicenseProvider(BaseProvider):
    def license_number(self):
        return f.bothify(text="?##-##-######", letters="ABCDEFGHIJKLMNOP")

f.add_provider(LicenseProvider)
print(f.license_number())

Geely Emgrand 1.5 Comfort P 945,000 (AT)
Union Bank of the Philippines
5
Silver
I17-39-515994


In [6]:
conn = connect(
    dbname='faker',
    user='faker',
    host='faker-db',
    password='faker'
)

In [11]:
cursor = conn.cursor()
# conn.autocommit = True
# DATABASE_NAME = "car_sales"
SCHEMA_NAME = "faker.raw"
TABLE_NAME = "faker.raw.raw_values"
# cursor.execute(sql.SQL(
#     "CREATE DATABASE {};").format(sql.Identifier( DATABASE_NAME )))
cursor.execute(sql.SQL(
    "CREATE SCHEMA IF NOT EXISTS {};").format(sql.Identifier( SCHEMA_NAME )))
cursor.execute(sql.SQL(
    """CREATE TABLE IF NOT EXISTS {} (
    id SERIAL PRIMARY KEY,
    name VARCHAR(25),
    license CHAR(13),
    num CHAR(15),
    email VARCHAR(40),
    company VARCHAR(80),
    street VARCHAR(80),
    city VARCHAR(40),
    province VARCHAR(20),
    date VARCHAR(27),
    bank VARCHAR(40),
    terms SMALLINT,
    car VARCHAR(100),
    color VARCHAR(8),
    plate VARCHAR(7)
    );""").format(sql.Identifier( TABLE_NAME )))
conn.commit()
cursor.close()

In [8]:
cursor = conn.cursor()
cursor.execute(
    """
SELECT 'raw'
FROM information_schema.schemata
"""
)
output = cursor.fetchall()
cursor.close()

print(output)

[('raw',), ('raw',), ('raw',), ('raw',), ('raw',)]


In [9]:
cursor = conn.cursor()
cursor.execute(
    """
SELECT *
FROM information_schema.tables
"""
)
output = cursor.fetchall()
cursor.close()

print(output)

[('faker', 'pg_catalog', 'pg_statistic', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None), ('faker', 'pg_catalog', 'pg_type', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None), ('faker', 'pg_catalog', 'pg_foreign_table', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None), ('faker', 'pg_catalog', 'pg_authid', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None), ('faker', 'pg_catalog', 'pg_shadow', 'VIEW', None, None, None, None, None, 'NO', 'NO', None), ('faker', 'pg_catalog', 'pg_statistic_ext_data', 'BASE TABLE', None, None, None, None, None, 'YES', 'NO', None), ('faker', 'pg_catalog', 'pg_roles', 'VIEW', None, None, None, None, None, 'NO', 'NO', None), ('faker', 'pg_catalog', 'pg_settings', 'VIEW', None, None, None, None, None, 'NO', 'NO', None), ('faker', 'pg_catalog', 'pg_file_settings', 'VIEW', None, None, None, None, None, 'NO', 'NO', None), ('faker', 'pg_catalog', 'pg_hba_file_rules', 'VIEW', None, None, None, None, None, 'NO', '

In [7]:
# Faker.seed(0)
target_values=10

percentage_of_null_values=0.05
weight_vs_null_values = [1-percentage_of_null_values,percentage_of_null_values]

sales_data = []

for row in range(target_values):
    name = random.choices([f.name(), "null"], weights=weight_vs_null_values)[0]
    license = random.choices([f.license_number(), "null"], weights=weight_vs_null_values)[0]    
    num = random.choices([f.mobile_number(), "null"], weights=weight_vs_null_values)[0]
    email = random.choices([f.ascii_free_email(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    company = random.choices([f.company(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    street = random.choices([f.street_address(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    city = random.choices([f.province_lgu(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    province = random.choices([f.province(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    date = random.choices([f.date_this_year(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    bank = random.choices([f.bank(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    terms = random.choices([f.terms(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    car = random.choices([f.car_sold(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    color = random.choices([f.car_color(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]
    plate_num = random.choices([f.automobile_license_plate(), "null"], weights=[1-percentage_of_null_values,percentage_of_null_values])[0]

    sales_data.append([name, license, num, email, company, street, city, province, date, bank, terms, car, color, plate_num])

In [8]:
sales_data

[['Jim Vargas',
  'P39-47-160110',
  '+63996-613-1264',
  'christine01@zohomail.com',
  'VY Foods Inc.',
  '3740 Zircon Road, Rosas Estates',
  'Tanudan',
  'Occidental Mindoro',
  datetime.date(2023, 2, 16),
  'In-house',
  '1',
  'Great Wall GWM Cannon P-Series Lux 4x2 P 1,198,000 (AT)',
  'Yellow',
  'OFT6075'],
 ['Kimberly Keller',
  'P51-61-489711',
  '+63917-893-3216',
  'berryjennifer@yahoo.com',
  'MK State Morning Development Inc.',
  '3596 48th Street, Miller Homes 4',
  'Lupon',
  'Batanes',
  datetime.date(2023, 2, 13),
  'Bank of the Philippine Islands',
  '4',
  'Suzuki Dzire 1.2 GA P 609,000 (MT)',
  'Sky Blue',
  'YGE552'],
 ['Kimberly Larson',
  'I54-41-481444',
  '+63924-277-8590',
  'null',
  'Miller Development Inc.',
  '1603-I Jacaranda Street',
  'Valderrama',
  'Camiguin',
  datetime.date(2023, 5, 17),
  'Philippine National Bank',
  '36',
  'Ford Ranger Raptor 2.0 Bi-Turbo AWD P 2,339,000 (AT)',
  'orange',
  'YQG5379'],
 ['Joseph Nguyen',
  'C75-59-974139',
  '

In [9]:
Faker.seed(0)
f.csv(data_columns=('{{name}}', 
                    '{{license_number}}', 
                    '{{mobile_number}}', 
                    '{{ascii_free_email}}', 
                    '{{company}}', 
                    '{{street_address}}', 
                    '{{province_lgu}}', 
                    '{{province}}', 
                    '{{date_this_year}}', 
                    '{{bank}}', 
                    '{{terms}}',
                    '{{car_sold}}', 
                    '{{car_color}}', 
                    '{{automobile_license_plate}}'), num_rows=3, include_row_ids=False)

'"Norma Fisher","E48-76-475938","0914-194-8924","blairrachel8zohomail.com","Gomez Gold Sun Mining Corporation","B18 L02 Cortez Subdivision, Andromeda Road","Liliw","Quezon","2023-05-22","Development Bank of the Philippines","24","Honda City 1.5 S P 953,000 (AT)","Yellow","OCC328"\r\n"Sheri Bolton DDS","J48-41-858398","+63911-965-9342","lorijohnson8zohomail.com","Pacific Empire Banking Corporation","31st Floor Rosas Condominiums Tower 4, 9664 Palanan Drive","Pintuyan","Guimaras","2023-01-16","Development Bank of the Philippines","5","Foton Harabas TM 300 MPV Imperium P 1,283,000 (MT)","blue","DWH3304"\r\n"Christopher Simmons","D01-23-098910","+63925-991-6151","jacobsjamesgmail.com","PWBW Liberty Capital Inc.","6F Brown Suites, 646 Bagtikan Service Road","Mansalay","Nueva Vizcaya","2023-02-20","Development BankPNB","3","Suzuki Jimny 1.5 GLX All-Grip Pro 2-Tone P 1,330,000 (AT)","gray","KQI022"\r\n'