In [1]:
!pip install faker
from faker import Faker
import pandas as pd
import random
from datetime import datetime, timedelta

Collecting faker
  Downloading faker-37.12.0-py3-none-any.whl.metadata (15 kB)
Downloading faker-37.12.0-py3-none-any.whl (2.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m13.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faker
Successfully installed faker-37.12.0


In [2]:
# Initialize Faker
fake = Faker('en_US')

In [3]:
industries = ['Accounting', 'Airlines/Aviation', 'Alternative Dispute Resolution', 'Alternative Medicine', 'Animation', 'Apparel & Fashion', 'Architecture & Planning', 'Arts and Crafts', 'Automotive', 'Aviation & Aerospace', 'Banking', 'Biotechnology', 'Broadcast Media', 'Building Materials', 'Business Supplies and Equipment', 'Capital Markets', 'Chemicals', 'Civic & Social Organization', 'Civil Engineering', 'Commercial Real Estate', 'Computer & Network Security', 'Computer Games', 'Computer Hardware', 'Computer Networking', 'Computer Software', 'Internet', 'Construction', 'Consumer Electronics', 'Consumer Goods', 'Consumer Services', 'Cosmetics', 'Dairy', 'Defense & Space', 'Design', 'Education Management', 'E-Learning', 'Electrical/Electronic Manufacturing', 'Entertainment', 'Environmental Services', 'Events Services', 'Executive Office', 'Facilities Services', 'Farming', 'Financial Services', 'Fine Art', 'Fishery', 'Food & Beverages', 'Food Production', 'Fund-Raising', 'Furniture', 'Gambling & Casinos', 'Glass', 'Government Administration', 'Government Relations', 'Graphic Design', 'Health', 'Higher Education', 'Hospital & Health Care', 'Hospitality', 'Human Resources', 'Import and Export', 'Individual & Family Services', 'Industrial Automation', 'Information Services', 'Information Technology and Services', 'Insurance', 'International Affairs', 'International Trade and Development', 'Investment Banking', 'Investment Management', 'Judiciary', 'Law Enforcement', 'Law Practice', 'Legal Services', 'Legislative Office', 'Leisure', 'Libraries', 'Logistics and Supply Chain', 'Luxury Goods & Jewelry', 'Machinery', 'Management Consulting', 'Maritime', 'Market Research', 'Marketing and Advertising', 'Mechanical or Industrial Engineering', 'Media Production', 'Medical Devices', 'Medical Practice', 'Mental Health Care', 'Military', 'Mining & Metals', 'Motion Pictures and Film', 'Museums and Institutions', 'Music', 'Nanotechnology', 'Newspapers', 'Non-Profit Organization Management', 'Oil & Energy', 'Online Media', 'Outsourcing/Offshoring', 'Package/Freight Delivery', 'Packaging and Containers', 'Paper & Forest Products', 'Performing Arts', 'Pharmaceuticals', 'Philanthropy', 'Photography', 'Plastics', 'Political Organization', 'Primary/Secondary Education', 'Printing', 'Professional Training & Coaching', 'Program Development', 'Public Policy', 'Public Relations and Communications', 'Public Safety', 'Publishing', 'Railroad Manufacture', 'Ranching', 'Real Estate', 'Recreational Facilities and Services', 'Religious Institutions', 'Renewables & Environment', 'Research', 'Restaurants', 'Retail', 'Security and Investigations', 'Semiconductors', 'Shipbuilding', 'Sporting Goods', 'Sports', 'Staffing and Recruiting', 'Supermarkets', 'Telecommunications', 'Textiles', 'Think Tanks', 'Tobacco', 'Translation and Localization', 'Transportation/Trucking/Railroad', 'Utilities', 'Venture Capital & Private Equity', 'Veterinary', 'Warehousing', 'Wholesale', 'Wine and Spirits', 'Wireless', 'Writing and Editing', 'Mobile Games']
#employee_ranges = ["1-10", "11-50", "51-200", "201-500", "501-1000", "1001-5000", "5000+"]
#revenue_ranges = ["$0-1M", "$1M-5M", "$5M-10M", "$10M-50M", "$50M-100M", "$100M-500M", "$500M+"]
c_type = ['Prospect', 'Partner', 'Reseller', 'Vendor', 'Other']
num_records = 1000

In [4]:
def generate_company_data(num_records):
  companies = []
  """Generate sample company data using Faker library"""
  for i in range(num_records):
    domain_name = fake.domain_name()
    company_name = fake.company()
    phone_number =fake.numerify(text='+1(%##)###-####')
    company_owner = fake.name()
    industry = random.choice(industries)
    company_type = random.choice(c_type)
    city = fake.city()
    state = fake.state()
    country = fake.current_country()
    postal_code = fake.postcode()
    num_employees = random.randint(20,5000)
    annual_revenue = random.randint(100000,500000000)
    timezone = fake.timezone()
    description = fake.catch_phrase() + ". " + fake.bs().capitalize() + "."
    linkedin_slug = company_name.lower().replace(' ', '-').replace(',', '').replace('.', '')
    linkedin_page = f"https://www.linkedin.com/company/{linkedin_slug}"
    street_address = fake.street_address()

    company = {
              "Company Domain Name": domain_name,
              "Company Name": company_name,
              "Phone Number": phone_number,
              "Company Owner": company_owner,
              "Industry Type": industry,
              "Type": company_type,
              "City": city,
              "State/Region": state,
              "Country/Region": country,
              "Postal Code": postal_code,
              "Number of Employees": num_employees,
              "Annual Revenue": annual_revenue,
              "Time Zone": timezone,
              "Description": description,
              "Linkedin company page": linkedin_page,
              "Street Address": street_address
          }

    companies.append(company)
  return companies

In [5]:
def display_companies(companies):
  """Display companies in a formatted way"""
  for idx, company in enumerate(companies, 1):
      print(f"\n{'='*60}")
      print(f"COMPANY #{idx}")
      print(f"{'='*60}")
      for key, value in company.items():
          print(f"{key:25}: {value}")

def export_to_csv(companies, filename="companies1.csv"):
  """Export company data to CSV file using pandas"""
  df = pd.DataFrame(companies)
  df.to_csv(filename, index=False)
  print(f"\nData exported to {filename}")
  return df

def export_to_excel(companies, filename="companies.xlsx"):
  """Export company data to Excel file using pandas"""
  df = pd.DataFrame(companies)
  df.to_excel(filename, index=False, sheet_name='Companies')
  print(f"Data exported to {filename}")
  return df

def export_to_json(companies, filename="companies.json"):
  """Export company data to JSON file using pandas"""
  df = pd.DataFrame(companies)
  df.to_json(filename, orient='records', indent=2)
  print(f"Data exported to {filename}")
  return df

def generate_summary_stats(companies):
  """Generate summary statistics using pandas"""
  df = pd.DataFrame(companies)

  print("\n" + "="*60)
  print("SUMMARY STATISTICS")
  print("="*60)

  print(f"\nTotal Companies: {len(df)}")
  print(f"\nIndustry Distribution:")
  print(df['Industry Type'].value_counts())

  print(f"\nEmployee Range Distribution:")
  print(df['Number of Employees'].value_counts())

  print(f"\nRevenue Range Distribution:")
  print(df['Annual Revenue'].value_counts())

  print(f"\nTop 5 States:")
  print(df['State/Region'].value_counts().head())

In [6]:
print(f"Generating {num_records} companies...")

companies = generate_company_data(num_records)

# Display the data
#display_companies(companies)

# Create DataFrame for analysis
#df = pd.DataFrame(companies)

# Display summary statistics
#generate_summary_stats(companies)

# Export to different formats (uncomment as needed)
export_to_csv(companies, "companies.csv")

Generating 1000 companies...

Data exported to companies.csv


Unnamed: 0,Company Domain Name,Company Name,Phone Number,Company Owner,Industry Type,Type,City,State/Region,Country/Region,Postal Code,Number of Employees,Annual Revenue,Time Zone,Description,Linkedin company page,Street Address
0,harrison.com,"Kim, Simpson and Hood",+1(582)985-4184,Brenda Dyer,Alternative Dispute Resolution,Other,New Christine,New York,United States,02844,3154,134313997,America/Lima,Organized dedicated frame. Re-intermediate out...,https://www.linkedin.com/company/kim-simpson-a...,135 Sims Villages Suite 756
1,bell.info,Hampton-Price,+1(815)827-6282,Raymond Miranda,Newspapers,Reseller,Lake Brooke,Maine,United States,99141,290,387572218,Asia/Kabul,Versatile motivating artificial intelligence. ...,https://www.linkedin.com/company/hampton-price,410 Haynes Club Apt. 900
2,hernandez-daniels.net,Miller-Williams,+1(925)646-3051,Christopher Campbell,Judiciary,Vendor,Matthewville,Tennessee,United States,69582,1637,447210186,Europe/Andorra,Future-proofed bandwidth-monitored instruction...,https://www.linkedin.com/company/miller-williams,6689 Bobby Hill Apt. 074
3,turner-carroll.net,Huynh Inc,+1(288)424-2124,Alexander Suarez,Electrical/Electronic Manufacturing,Vendor,Savannahville,Illinois,United States,02999,2112,343246460,Europe/Oslo,Down-sized 4thgeneration service-desk. Reinven...,https://www.linkedin.com/company/huynh-inc,2128 Larsen Estates Apt. 424
4,thompson.com,Shaw-Dixon,+1(982)773-6674,Angel Wilson,Arts and Crafts,Other,New Ronaldborough,Montana,United States,03474,3111,336291252,America/Merida,Compatible exuding complexity. Utilize end-to-...,https://www.linkedin.com/company/shaw-dixon,64411 Adam Bypass
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,ballard.com,Smith PLC,+1(582)207-4512,Robin Farmer,Judiciary,Partner,Lake Thomasland,Massachusetts,United States,05449,2331,397016716,America/Managua,Optional real-time process improvement. Envisi...,https://www.linkedin.com/company/smith-plc,65890 Jonathan Keys
996,hale-pittman.net,"Ortiz, Austin and Zimmerman",+1(626)740-5611,Ronald Cherry,Civic & Social Organization,Other,Lake Eileenburgh,Missouri,United States,94459,3287,284567496,Asia/Beirut,Reduced non-volatile hub. Strategize enterpris...,https://www.linkedin.com/company/ortiz-austin-...,37704 Gibbs Square
997,hubbard.com,"Lamb, Clark and Herman",+1(337)104-5224,Margaret Huffman,Computer Networking,Vendor,Michaelport,Wyoming,United States,71171,3078,85569052,Europe/Oslo,Seamless 6thgeneration infrastructure. Leverag...,https://www.linkedin.com/company/lamb-clark-an...,725 Luke Hills
998,rivera-kim.com,Bradley PLC,+1(379)625-4640,Mariah Cardenas,Food & Beverages,Vendor,North Kelly,Wyoming,United States,07740,754,279442860,Europe/Budapest,Networked national knowledgebase. E-enable sti...,https://www.linkedin.com/company/bradley-plc,380 Mora Knoll


In [None]:
random.randint(20,5000)

461