In [9]:
import csv
from faker import Faker
import random

fake = Faker()

# Generate synthetic data for all tables
def generate_data(num_records):
    data = []
    for _ in range(num_records):
        # COMPANY table
        cname = fake.company()
        county = random.randint(1, 99)  # Generate a random 2-digit integer for county

        # FACILITY table
        fnumber = f"{random.randint(1, 99999999):08d}"
        phone = ''.join(random.choices(string.digits, k=10))  # Random 10-digit phone number
        city = fake.city()
        zip_code = fake.zipcode()[:5]  # Get only first 5 digits
        st_address = fake.street_address()
        suite = fake.building_number()  # Generate suite number
        cname_facility = fake.company()

        # PROGRAM table
        capacity = random.randint(1, 100)
        pname = fake.catch_phrase() + " Health Program"
        pid = f"{random.randint(1, 99999999):08d}"
        fnumber_program = fnumber  # Use the same facility number as the facility table

        # SERVICE table
        stype = random.choice(["RES", "NON", "DETOX"])  # Select from given service types
        pid_service = pid  # Use the same PID as the program table

        # POPULATION table
        ptype = random.choice(["MEN", "WOMEN", "COED"])  # Select from given population types
        occupancy = random.randint(0, capacity)  # Assuming occupancy is less than or equal to capacity
        pid_population = pid  # Use the same PID as the program table

        # FACILITY_LOCATIONS table
        flocations = random.randint(1, 10)  # Random number of facility locations

        # Constructing the row for each table
        row_company = [cname, county]
        row_facility = [fnumber, phone, city, zip_code, st_address, suite, cname_facility]
        row_program = [capacity, pname, pid, fnumber_program]
        row_service = [stype, pid_service]
        row_population = [ptype, occupancy, pid_population]
        row_facility_locations = [fnumber, flocations]

        data.append((row_company, row_facility, row_program, row_service, row_population, row_facility_locations))

    return data

# Save all data to CSV files for each table
def save_to_csv(data, filenames):
    for idx, filename in enumerate(filenames):
        with open(filename, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            table_data = [row[idx] for row in data]  # Get data for the specific table
            writer.writerows(table_data)

# Number of records to generate
num_records = 500

# Generate data for all tables
data = generate_data(num_records)

# Specify filenames for each table
filenames = ['company.csv', 'facility.csv', 'program.csv', 'service.csv', 'population.csv', 'facility_locations.csv']

# Save data to CSV files for each table
save_to_csv(data, filenames)


In [3]:
import csv
from faker import Faker
import random
import string

fake = Faker()

# Generate unique company names
def generate_company_names(num_records):
    company_names = [fake.company() for _ in range(num_records)]
    return list(set(company_names))  # Ensure uniqueness

# Generate synthetic data for all tables
def generate_data(num_records):
    # Generate unique company names
    company_names = generate_company_names(num_records)

    data = []
    for cname in company_names:
        # COMPANY table
        county = random.randint(1, 99)  # Generate a random 2-digit integer for county

        # FACILITY table
        fnumber = f"{random.randint(1, 99999999):08d}"
        phone = ''.join(random.choices(string.digits, k=10))  # Random 10-digit phone number
        city = fake.city()
        zip_code = fake.zipcode()[:5]  # Get only first 5 digits
        st_address = fake.street_address()
        suite = fake.building_number()  # Generate suite number

        # PROGRAM table
        capacity = random.randint(1, 100)
        pname = fake.catch_phrase() + " Health Program"
        pid = f"{random.randint(1, 99999999):08d}"

        # SERVICE table
        stype = random.choice(["RES", "NON", "DETOX"])  # Select from given service types

        # POPULATION table
        ptype = random.choice(["MEN", "WOMEN", "COED"])  # Select from given population types
        occupancy = random.randint(0, capacity)  # Assuming occupancy is less than or equal to capacity

        # FACILITY_LOCATIONS table
        state = fake.state_abbr()  # Random state abbreviation

        # Constructing the row for each table
        row_company = [cname, county]
        row_facility = [fnumber, phone, city, zip_code, st_address, suite, cname]
        row_program = [capacity, pname, pid, fnumber]
        row_service = [stype, pid]
        row_population = [ptype, occupancy, pid]
        row_facility_locations = [fnumber, state]

        data.append((row_company, row_facility, row_program, row_service, row_population, row_facility_locations))

    return data

# Save all data to CSV files for each table
def save_to_csv(data, filenames):
    for idx, filename in enumerate(filenames):
        with open(filename, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            table_data = [row[idx] for row in data]  # Get data for the specific table
            writer.writerows(table_data)

# Number of records to generate
num_records = 500

# Generate data for all tables
data = generate_data(num_records)

# Specify filenames for each table
filenames = ['company.csv', 'facility.csv', 'program.csv', 'service.csv', 'population.csv', 'facility_locations.csv']

# Save data to CSV files for each table
save_to_csv(data, filenames)
