In [None]:
import pandas as pd
import random
from faker import Faker

In [None]:
mapping.update({
    "Product Sales": {
        "category": "Revenue",
        "subcategories": [
            "Sales of machinery parts and components",
            "Sales of industrial tools and equipment",
            "Sales of automotive accessories and components",
            "Sales of electronic components and devices",
            "Sales of medical equipment and supplies",
            "Sales of construction machinery and materials",
            "Sales of aerospace components and systems",
            "Sales of agricultural machinery and implements",
            "Sales of renewable energy systems and components",
            "Sales of mining equipment and materials",
            "Sales of marine equipment and parts",
            "Sales of HVAC systems and components"
        ]
    },
    "Service Revenue": {
        "category": "Revenue",
        "subcategories": [
            "Revenue from equipment installation and commissioning services",
            "Revenue from equipment maintenance contracts and services",
            "Revenue from product customization and engineering consulting",
            "Revenue from technical support and troubleshooting services",
            "Revenue from training programs and certification courses",
            "Revenue from aftermarket parts sales and service contracts",
            "Revenue from product design and prototyping services",
            "Revenue from quality control and testing services",
            "Revenue from supply chain optimization consulting"
        ]
    },
    "Digital Sales": {
        "category": "Revenue",
        "subcategories": [
            "Revenue from software licenses and subscriptions",
            "Revenue from digital product downloads and upgrades",
            "Revenue from online training and certification programs",
            "Revenue from digital asset management platforms",
            "Revenue from virtual product simulations and demonstrations",
            "Revenue from e-commerce platforms for parts and components"
        ]
    },
    "Cost of Goods Sold (COGS)": {
        "category": "Expense",
        "subcategories": [
            "Cost of raw materials and commodities",
            "Cost of manufacturing labor and overhead",
            "Cost of machinery and equipment depreciation",
            "Cost of energy consumption and utilities",
            "Cost of packaging materials and containers",
            "Cost of transportation and logistics",
            "Cost of quality assurance and testing"
        ]
    },
    "Operating Expenses": {
        "category": "Expense",
        "subcategories": [
            "Costs of research and development (R&D)",
            "Costs of marketing and advertising campaigns",
            "Costs of sales commissions and incentives",
            "Costs of administrative salaries and wages",
            "Costs of office rent and utilities",
            "Costs of insurance premiums and risk management",
            "Costs of legal fees and compliance consulting",
            "Costs of information technology infrastructure"
        ]
    },
    "Loss and Waste": {
        "category": "Expense",
        "subcategories": [
            "Costs of defective product recalls and replacements",
            "Costs of excess inventory storage and management",
            "Costs of waste disposal and environmental compliance",
            "Costs of rework and scrap materials",
            "Costs of warranty claims and customer returns"
        ]
    },
    "Inventory": {
        "category": "Asset",
        "subcategories": [
            "Raw material inventory levels and valuation",
            "Work-in-progress (WIP) inventory tracking",
            "Finished goods inventory management",
            "Inventory turnover ratios and optimization strategies"
        ]
    },
    "Fixed Assets": {
        "category": "Asset",
        "subcategories": [
            "Machinery and equipment depreciation schedules",
            "Building and facility asset management",
            "Vehicle fleet management and maintenance",
            "Technology infrastructure and software assets"
        ]
    },
    "Intangible Assets": {
        "category": "Asset",
        "subcategories": [
            "Intellectual property portfolio management",
            "Software licenses and development projects",
            "Brand equity and reputation management",
            "Customer relationship management systems"
        ]
    },
    "Accounts Payable": {
        "category": "Liability",
        "subcategories": [
            "Supplier invoices processing and payments",
            "Purchase order management and tracking",
            "Trade credit agreements and terms negotiation",
            "Accrued expenses for goods and services received"
        ]
    },
    "Loans and Financing": {
        "category": "Liability",
        "subcategories": [
            "Bank loans and lines of credit utilization",
            "Equipment financing agreements and repayments",
            "Debt covenants compliance monitoring",
            "Interest payments and loan amortization"
        ]
    },
    "Product Liabilities": {
        "category": "Liability",
        "subcategories": [
            "Warranty reserves and provisions",
            "Product liability insurance coverage",
            "Legal claims and litigation expenses",
            "Product safety and compliance costs"
        ]
    },
    "Owner's Equity": {
        "category": "Equity",
        "subcategories": [
            "Capital contributions from owners and shareholders",
            "Retained earnings allocation and reinvestment",
            "Dividends declaration and distribution"
        ]
    },
    "External Investments": {
        "category": "Equity",
        "subcategories": [
            "Equity investments from venture capital firms",
            "Private equity financing and partnerships",
            "Joint ventures and strategic alliances"
        ]
    },
    "Retained Earnings and Dividends": {
        "category": "Equity",
        "subcategories": [
            "Retained earnings utilization for growth initiatives",
            "Dividend policy development and implementation",
            "Share repurchase programs and equity buybacks"
        ]
    }
})

def adjust_amount(category, subcategory, amount):

    #Adjusts the amount based on the category and subcategory to make it realistic.

    if category == "Revenue":
        amount *= random.uniform(1.1, 1.5)  # Adjust by 10% to 50% higher for revenue
    elif category == "Expenses":
        amount *= random.uniform(0.7, 1.2)  # Adjust by 30% lower to 20% higher for expenses
    elif category == "Asset":
        amount *= random.uniform(0.8, 1.2)  # Adjust by 20% lower to 20% higher for assets
    elif category == "Liability":
        amount *= random.uniform(0.8, 1.2)  # Adjust by 20% lower to 20% higher for liabilities
    elif category == "Equity":
        amount *= random.uniform(0.9, 1.1)  # Adjust by 10% lower to 10% higher for equity
    return round(amount, 2)

def generate_data(mapping, num_entries=2500):
    data = []
    vendors_consumers = []
    for _ in range(num_entries):
        desc, info = random.choice(list(mapping.items()))
        category = info["category"]
        subcategory = random.choice(info["subcategories"])
        amount = adjust_amount(category, subcategory, random.uniform(100, 10000))
        data.append([desc, amount, category, subcategory])
        if len(vendors_consumers) < num_entries // 5:
            vendor_consumer = fake.company() if random.random() < 0.7 else random.choice(["Apple Inc.", "Microsoft Corporation", "Amazon.com, Inc.", "Google LLC"])
            vendors_consumers.extend([vendor_consumer] * random.randint(2, 4))  # Repeat 2-4 times
    vendors_consumers.extend([fake.company() for _ in range(num_entries - len(vendors_consumers))])  # Extend the list to match DataFrame length
    random.shuffle(vendors_consumers)  # Shuffle to randomize the order
    return data, vendors_consumers

# Generate data
data, vendors_consumers = generate_data(mapping)

# Convert to DataFrame
columns = ['Description', 'Amount', 'Category', 'Subcategory']
df = pd.DataFrame(data, columns=columns)

# Add other columns
df['Date'] = [fake.date_between(start_date='-3y', end_date='today').strftime('%Y-%m-%d') for _ in range(df.shape[0])]
df['Company ID'] = [random.randint(1000, 9999) for _ in range(df.shape[0])]
df['Industry'] = 'Manufacturer'

# Assign vendors/consumers from the generated list
df['vendor_or_consumer'] = vendors_consumers[:len(df)]

df['Payment Method'] = [random.choice(['Check', 'Cash', 'Bank Transfer','Card']) for _ in range(df.shape[0])]
df['Currency'] = 'USD'
df['Geographical Location'] = 'USA'
df['Account Number'] = [random.randint(10000000, 99999999) for _ in range(df.shape[0])]
df['Reference Number'] = [random.randint(1000000000, 9999999999) for _ in range(df.shape[0])]
df['Approval Status'] = [random.choice(['Approved', 'Pending', 'Rejected']) for _ in range(df.shape[0])]

df['Customer Type'] = df['Description'].apply(lambda x: 'New Customer' if 'new' in x.lower() else 'Existing Customer')

# Save to CSV
df.to_csv('manufacturer.csv', index=False)

# Display first few rows
print(df.head())



                         Description   Amount   Category  \
0                      Service Sales  3864.13    Revenue   
1                  Retained Earnings  3040.78     Equity   
2                      Digital Sales  9365.25    Revenue   
3                      Service Sales  1516.31    Revenue   
4  Customer Deposits and Liabilities   819.83  Liability   

                                         Subcategory        Date  Company ID  \
0                  Personal shopping service revenue  2023-03-02        4594   
1             Profits reinvested for business growth  2022-12-19        1130   
2  Revenue from virtual product simulations and d...  2023-01-24        6855   
3                      Installation services revenue  2022-07-24        2162   
4  Refund liabilities for product returns and exc...  2023-10-17        1552   

       Industry          vendor_or_consumer Payment Method Currency  \
0  Manufacturer  Garner, Brown and Martinez          Check      USD   
1  Manufacturer 

In [None]:
df =pd.read_csv('/content/manufacturer.csv')
df

Unnamed: 0,Description,Amount,Category,Subcategory,Date,Company ID,Industry,vendor_or_consumer,Payment Method,Currency,Geographical Location,Account Number,Reference Number,Approval Status,Customer Type
0,Service Sales,3864.13,Revenue,Personal shopping service revenue,2023-03-02,4594,Manufacturer,"Garner, Brown and Martinez",Check,USD,USA,85942447,7067003327,Approved,Existing Customer
1,Retained Earnings,3040.78,Equity,Profits reinvested for business growth,2022-12-19,1130,Manufacturer,Suarez-White,Card,USD,USA,63188055,7425718862,Rejected,Existing Customer
2,Digital Sales,9365.25,Revenue,Revenue from virtual product simulations and d...,2023-01-24,6855,Manufacturer,Hawkins-Hayden,Check,USD,USA,45587887,9893118056,Approved,Existing Customer
3,Service Sales,1516.31,Revenue,Installation services revenue,2022-07-24,2162,Manufacturer,Hunt-Lewis,Bank Transfer,USD,USA,59618995,5659592699,Rejected,Existing Customer
4,Customer Deposits and Liabilities,819.83,Liability,Refund liabilities for product returns and exc...,2023-10-17,1552,Manufacturer,Coffey LLC,Bank Transfer,USD,USA,66268690,6605440100,Approved,Existing Customer
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2495,Fixed Assets,8969.81,Asset,Building and facility asset management,2023-12-17,7173,Manufacturer,Gonzales-Duran,Check,USD,USA,67120702,6168794991,Pending,Existing Customer
2496,Shareholder Contributions,1055.51,Equity,Dividend payments to shareholders,2021-07-27,8629,Manufacturer,Smith-Le,Bank Transfer,USD,USA,21480598,1464549460,Rejected,Existing Customer
2497,Operating Expenses,5397.88,Expense,Costs of legal fees and compliance consulting,2022-12-06,1072,Manufacturer,Pitts PLC,Card,USD,USA,80539682,8487388176,Rejected,Existing Customer
2498,Operational Costs,4966.96,Expense,Marketing and advertising costs,2021-07-24,7213,Manufacturer,Russell-Grant,Cash,USD,USA,24668775,2559506977,Approved,Existing Customer
