In [16]:
import random
import string
import json
import secrets
from pathlib import Path
from faker import Faker

fake = Faker(use_weighting=False)


In [17]:
product_names_json = Path().resolve() / "product_names.json"

with open(product_names_json, "r", encoding="utf-8") as f:
    product_names = json.load(f)


In [18]:
def random_string(string_length: int = 10) -> str:
    alphabet = string.ascii_letters + string.digits
    return ''.join(secrets.choice(alphabet) for _ in range(string_length))


In [19]:
def number():
    return random.randrange(-1000, 1000)

def small_positive_integer():
    return random.randrange(1, 100)

def positive_integer():
    return random.randrange(1, 1000)

def number_employees():
    return random.randrange(1, 10000)


In [20]:
def long_text():
    return fake.paragraph(nb_sentences=1)

def username():
    return fake.user_name()

def full_name():
    return "{} {}".format(fake.first_name(), fake.last_name())

def sex():
    return random.choice(['Male', 'Female'])



In [21]:
def brand():
    return fake.company()

def department():
    return random.choice([
        "Marketing & Proposals Department", "Sales Department", "Project Department",
        "Designing Department", "Production Department", "Maintenance Department",
        "Store Department", "Procurement Department", "Quality Department",
        "Inspection Department", "Packaging Department", "Finance Department",
        "Dispatch Department", "Account Department", "Research & Development Department",
        "Information Technology Department", "Human Resource Department",
        "Security Department", "Administration Department"
    ])


In [22]:
def deal_stage():
    values = [
        "New Lead", "Contacted", "Qualified", "Proposal Sent", "Negotiation",
        "Closed Won", "Closed Lost", "Re-engagement", "On Hold", "Disqualified"
    ]
    return random.choice(values)

def deal_source():
    values = [
        "Website Form", "Cold Email", "Cold Call", "Referral", "Social Media",
        "LinkedIn Outreach", "Google Ads", "Facebook Ads", "Organic Search (SEO)",
        "Content Marketing", "Webinars", "Trade Show", "Networking Event", "Purchased List",
        "Partner Program", "Chatbot", "Direct Traffic", "Retargeting Ads", "Podcast", "Other"
    ]
    return random.choice(values)


In [23]:
def color():
    return fake.color_name()

def size():
    clothing_sizes = ["XS", "S", "M", "L", "XL", "XXL"]
    generic_sizes = ["Small", "Medium", "Large", "Extra Large"]
    dimensions = [
        "10x10 cm", "15x20 cm", "30x40 cm", "50x70 cm",
        "100x200 mm", "5x7 in", "8x10 in", "12x18 in"
    ]
    size_pool = clothing_sizes + generic_sizes + dimensions
    return random.choice(size_pool)

def availability():
    statuses = [
        "in_stock", "out_of_stock", "pre_order",
        "discontinued", "limited_stock", "backorder"
    ]
    return random.choice(statuses)


In [24]:
def product_name():
    adjectives = [
        "Smart", "Ultra", "Eco", "Wireless", "Portable", "Pro", "Mini", "Advanced", "Digital",
        "Compact", "Premium", "Rechargeable", "Smart", "Fast", "Silent", "Clean", "Automatic"
    ]
    extra = [
        "Max", "X", "Go", "One", "360", "Plus", "Edge", "Prime", "Lite", "Air", "Touch", "Sense"
    ]
    
    word_count = random.randint(1, 5)
    name_parts = []

    if word_count >= 2:
        name_parts.append(random.choice(adjectives))
        name_parts.append(random.choice(product_names))
        word_count -= 2
    else:
        name_parts.append(random.choice(product_names))
        word_count -= 1

    for _ in range(word_count):
        name_parts.append(random.choice(extra + adjectives))

    return " ".join(name_parts)


In [25]:
def product_category():
    values = [
        "Clothing & Apparel", "Home & Kitchen", "Beauty & Personal Care", "Health & Wellness",
        "Sports & Outdoors", "Toys & Games", "Automotive", "Books & Stationery",
        "Office Supplies", "Smartphones", "Laptops & Computers", "Smartwatches",
        "Headphones & Earbuds", "Cameras & Accessories", "Men's Clothing", "Women's Clothing",
        "Kids' Clothing", "Shoes & Footwear", "Accessories (Bags, Hats, Belts)", "Furniture",
        "Kitchen Appliances", "Bedding & Bath", "Home Decor", "Cleaning Supplies",
        "Skincare", "Haircare", "Makeup", "Fragrances", "Grooming Tools", "Fitness Equipment",
        "Camping & Hiking", "Cycling", "Team Sports", "Fishing & Hunting"
    ]
    return random.choice(values)


In [26]:
def currency():
    return 'USD'

def industry():
    return random.choice([
        "Accounting", "Aviation", "Biotech", "Broadcast Media", "Construction",
        "Consumer Goods", "Design", "E-Learning", "Education", "Finance", "Healthcare",
        "Hospitality", "IT", "Legal", "Logistics", "Manufacturing", "Marketing", "Media",
        "Medical Devices", "Mining", "Non-Profit", "Oil & Gas", "Pharmaceuticals",
        "Retail", "Security", "Telecommunications", "Transportation", "Utilities"
    ])


In [27]:
TYPES_TO_GENERATORS = {
    'id': random_string,
    'first_name': fake.first_name,
    'last_name': fake.last_name,
    'full_name': full_name,
    'username': username,
    'company': fake.company,
    'industry': industry,
    'business_department': department,
    'company_desc': fake.catch_phrase,
    'company_number_employees': number_employees,
    'city': fake.city,
    'country': fake.country,
    'sex': sex,
    'ean': fake.ean,
    'url': fake.url,
    'email': fake.email,
    'currency': currency,
    'availability': availability,
    'size': size,
    'color': color,
    'business_email': fake.company_email,
    'website': fake.url,
    'job': fake.job,
    'number': number,
    'small_positive_integer': small_positive_integer,
    'positive_integer': positive_integer,
    'product_name': product_name,
    'brand': brand,
    'product_category': product_category,
    'deal_stage': deal_stage,
    'deal_source': deal_source,
    'date': fake.date,
    'year': fake.year,
    'datetime': fake.date_time,
    'date_this_decade': fake.date_this_decade,
    'date_of_birth': fake.date_of_birth,
    'long_text': long_text,
    'address': fake.address,
    'phone': fake.phone_number
}


In [28]:
from pprint import pprint

sample = {key: generator() for key, generator in TYPES_TO_GENERATORS.items()}
pprint(sample)


{'address': 'PSC 1612, Box 8993\nAPO AE 98847',
 'availability': 'in_stock',
 'brand': 'Moyer and Sons',
 'business_department': 'Designing Department',
 'business_email': 'iwalker@kent.org',
 'city': 'Lake Jack',
 'color': 'LightBlue',
 'company': 'Brown, Riley and Castillo',
 'company_desc': 'User-friendly modular extranet',
 'company_number_employees': 7121,
 'country': 'Russian Federation',
 'currency': 'USD',
 'date': '1985-09-05',
 'date_of_birth': datetime.date(2009, 5, 24),
 'date_this_decade': datetime.date(2022, 9, 20),
 'datetime': datetime.datetime(1973, 2, 13, 5, 10, 59),
 'deal_source': 'Chatbot',
 'deal_stage': 'Re-engagement',
 'ean': '2922527073245',
 'email': 'pollardross@example.org',
 'first_name': 'Monique',
 'full_name': 'Evelyn Hoover',
 'id': 'PhAE4TwZc3',
 'industry': 'Telecommunications',
 'job': 'Technical brewer',
 'last_name': 'Massey',
 'long_text': 'Behavior history need might conference behavior street.',
 'number': 360,
 'phone': '670.495.9623x7243',
 '

In [29]:
from pprint import pprint

data = {
    "ID": random_string(),
    "Name": full_name(),
    "Username": username(),
    "Email": fake.email(),
    "Company": brand(),
    "Product": product_name(),
    "Category": product_category(),
    "Color": color(),
    "Size": size(),
    "Price": positive_integer(),
    "Status": availability(),
    "Department": department(),
    "Industry": industry()
}

pprint(data)


{'Category': 'Clothing & Apparel',
 'Color': 'Cornsilk',
 'Company': 'Osborn, Nielsen and Duffy',
 'Department': 'Sales Department',
 'Email': 'mcdowellmarcia@example.com',
 'ID': '5SbdRwqLUa',
 'Industry': 'Biotech',
 'Name': 'Karen Dickson',
 'Price': 264,
 'Product': 'Ultra Trimmer Clock Premium Clean',
 'Size': '15x20 cm',
 'Status': 'pre_order',
 'Username': 'romerohailey'}
