In [1]:
import pandas as pd
import random
from faker import Faker

In [2]:

fake = Faker()
mapping = {
    "Hospital Inpatient Service Fee": {
        "category": "Revenue",
        "subcategories": [
            "Cardiology Inpatient Care"
        ]
    },
    "Outpatient Clinic Visit Fee": {
        "category": "Revenue",
        "subcategories": [
            "Primary Care Consultation"
        ]
    },
    "Emergency Room Treatment Fee": {
        "category": "Revenue",
        "subcategories": [
            "Trauma Care"
        ]
    },
    "Medical Imaging Service Fee": {
        "category": "Revenue",
        "subcategories": [
            "CT Scan"
        ]
    },
    "Ambulance Transport Service Fee": {
        "category": "Revenue",
        "subcategories": [
            "Emergency Medical Transportation"
        ]
    },
    "Physical Therapy Session Fee": {
        "category": "Revenue",
        "subcategories": [
            "Rehabilitation Therapy"
        ]
    },
    "Mental Health Counseling Session Fee": {
        "category": "Revenue",
        "subcategories": [
            "Psychotherapy Session"
        ]
    },
    "Home Health Care Service Fee": {
        "category": "Revenue",
        "subcategories": [
            "Geriatric Care"
        ]
    },
    "Telemedicine Consultation Fee": {
        "category": "Revenue",
        "subcategories": [
            "Remote Psychiatry Consultation"
        ]
    },
    "Health and Wellness Program Enrollment": {
        "category": "Revenue",
        "subcategories": [
            "Nutrition Counseling Program"
        ]
    },
    "Medical Staff Salaries Payment": {
        "category": "Expense",
        "subcategories": [
            "Nursing Staff"
        ]
    },
    "Medical Supplies Purchase": {
        "category": "Expense",
        "subcategories": [
            "Surgical Consumables"
        ]
    },
    "Rent Payment for Medical Office Space": {
        "category": "Expense",
        "subcategories": [
            "Clinic Facility Lease"
        ]
    },
    "Utilities Payment": {
        "category": "Expense",
        "subcategories": [
            "Water and Gas Utility Bills"
        ]
    },
    "Medical Equipment Maintenance Service Fee": {
        "category": "Expense",
        "subcategories": [
            "Biomedical Equipment Service"
        ]
    },
    "Training Expense": {
        "category": "Expense",
        "subcategories": [
            "Clinical Skills Training"
        ]
    },
    "Marketing and Advertising Expense": {
        "category": "Expense",
        "subcategories": [
            "Digital Marketing Campaign"
        ]
    },
    "IT Support Service Fee": {
        "category": "Expense",
        "subcategories": [
            "Electronic Health Record (EHR) Maintenance"
        ]
    },
    "Legal Fees Payment": {
        "category": "Expense",
        "subcategories": [
            "Healthcare Compliance Legal Consultation"
        ]
    },
    "Facility Renovation Expense": {
        "category": "Expense",
        "subcategories": [
            "Hospital Renovation Project"
        ]
    },
    "Purchase of MRI Machine": {
        "category": "Asset",
        "subcategories": [
            "Diagnostic Imaging Equipment"
        ]
    },
    "Construction for New Facility": {
        "category": "Asset",
        "subcategories": [
            "Hospital Building Construction"
        ]
    },
    "Investment in Medical Research Equipment": {
        "category": "Asset",
        "subcategories": [
            "Laboratory Research Instruments"
        ]
    },
    "Acquisition of Ambulance Fleet": {
        "category": "Asset",
        "subcategories": [
            "Emergency Response Vehicles"
        ]
    },
    "Purchase of Real Estate for Healthcare Campus": {
        "category": "Asset",
        "subcategories": [
            "Medical Center Property Acquisition"
        ]
    },
    "Loan from Bank for Facility Expansion": {
        "category": "Liability",
        "subcategories": [
            "Long-Term Debt"
        ]
    },
    "Accounts Payable for Medical Supplies": {
        "category": "Liability",
        "subcategories": [
            "Supplier Invoices"
        ]
    },
    "Accrued Salaries Payable": {
        "category": "Liability",
        "subcategories": [
            "Employee Wages Owed"
        ]
    },
    "Investment from Shareholders for Hospital Expansion": {
        "category": "Equity",
        "subcategories": [
            "Common Stock"
        ]
    },
    "Retained Earnings Allocation": {
        "category": "Equity",
        "subcategories": [
            "Profit Retention"
        ]
    },
    "Accrued Expenses for Medical Malpractice Claims": {
        "category": "Liability",
        "subcategories": [
            "Litigation Settlements"
        ]
    },
    "Deferred Revenue for Prepaid Health Services": {
        "category": "Liability",
        "subcategories": [
            "Advance Payments for Future Treatments"
        ]
    },
    "Lease Obligations for Medical Equipment": {
        "category": "Liability",
        "subcategories": [
            "Equipment Lease Payments"
        ]
    },
    "Deferred Tax Liability": {
        "category": "Liability",
        "subcategories": [
            "Tax Obligations Deferred to Future Periods"
        ]
    },
    "Accounts Payable for Utility Services": {
        "category": "Liability",
        "subcategories": [
            "Outstanding Utility Invoices"
        ]
    },
    "Issuance of Preferred Stock for Capital Infusion": {
        "category": "Equity",
        "subcategories": [
            "Preferred Stock Offering"
        ]
    },
    "Dividend Declaration to Shareholders": {
        "category": "Equity",
        "subcategories": [
            "Distribution of Profits"
        ]
    },
    "Stock Option Grants to Employees": {
        "category": "Equity",
        "subcategories": [
            "Employee Stock Options"
        ]
    },
    "Capital Contribution from Partners for New Healthcare Facility": {
        "category": "Equity",
        "subcategories": [
            "Capital Injection"
        ]
    },
    "Treasury Stock Repurchase": {
        "category": "Equity",
        "subcategories": [
            "Buyback of Company Shares"
        ]
    },
    "Health Insurance": {
        "category": "Expense",
        "subcategories": [
            "Medical cost from government program",
            "Insurance company cover medical cost",
            "Losses recovery from government",
            "Salary payments Salaries paid to healthcare providers by insurance companies"
        ]
    },
    "Medical Insurance": {
        "category": "Other Income",
        "subcategories": [
            "Insurance reimbursements",
            "Premium insurance",
            "Capitation Payments",
            "Risk Adjustment Payments"
        ]
    }
}


def adjust_amount(category, subcategory, amount):
    """
    Adjusts the amount based on the category and subcategory to make it realistic.

    Parameters:
        category (str): The category of the transaction.
        subcategory (str): The subcategory of the transaction.
        amount (float): The original amount of the transaction.

    Returns:
        float: The adjusted amount.
    """
    # Implement  adjustments for amount here on category and subcategory
    if category == "Revenue":
        amount *= random.uniform(1.1, 1.5)  # Adjust by 10% to 50% higher for revenue
    elif category == "Expenses":
        amount *= random.uniform(0.7, 1.2)  # Adjust by 30% lower to 20% higher for expenses
    elif category == "Asset":
        amount *= random.uniform(0.8, 1.2)  # Adjust by 20% lower to 20% higher for assets
    elif category == "Liability":
        amount *= random.uniform(0.8, 1.2)  # Adjust by 20% lower to 20% higher for liabilities
    elif category == "Equity":
        amount *= random.uniform(0.9, 1.1)  # Adjust by 10% lower to 10% higher for equity
    elif category == "Other Income":  # Adjust for "Health Insurance" subcategories
        amount *= random.uniform(0.95, 1.05)  # Adjust by 5% lower to 5% higher for other income
    return round(amount, 2)

def generate_data(mapping, num_entries=2500):

    data = []
    vendors_consumers = []
    for _ in range(num_entries):
        desc, info = random.choice(list(mapping.items()))
        category = info["category"]
        subcategory = random.choice(info["subcategories"])
        amount = round(random.uniform(1000, 1000000), 2)
        amount = adjust_amount(category, subcategory, amount)
        data.append([desc, amount, category, subcategory])
        if len(vendors_consumers) < num_entries // 5:
            vendor_consumer = fake.company() if random.random() < 0.7 else random.choice(["Apple Inc.", "Microsoft Corporation", "Amazon.com, Inc.", "Google LLC"])
            vendors_consumers.extend([vendor_consumer] * random.randint(2, 4))  # Repeat 2-4 times
    vendors_consumers.extend([fake.company() for _ in range(num_entries - len(vendors_consumers))])  # Extend the list to match DataFrame length
    random.shuffle(vendors_consumers)  # Shuffle to randomize the order
    return data, vendors_consumers

# Generate data
data, vendors_consumers = generate_data(mapping)

# Convert to DataFrame
columns = ['Description', 'Amount', 'Category', 'Subcategory']
df = pd.DataFrame(data, columns=columns)

# Add other columns
df['Date'] = pd.Timestamp.now().strftime('%Y-%m-%d')
df['Company ID'] = [random.randint(1000, 9999) for _ in range(df.shape[0])]
df['Industry'] = 'Healthcare'

# Assign vendors/consumers from the generated list
df['Vendor_or_Consumer'] = vendors_consumers[:len(df)]

# Add additional columns with random data
df['Payment_Method'] = [random.choice(['Check', 'Cash', 'Bank Transfer']) for _ in range(df.shape[0])]
df['Currency'] = 'USD'
df['Geographical_Location'] = 'USA'
df['Account_Number'] = [random.randint(10000000, 99999999) for _ in range(df.shape[0])]
df['Reference_Number'] = [random.randint(1000000000, 9999999999) for _ in range(df.shape[0])]
df['Approval_Status'] = [random.choice(['Approved', 'Pending', 'Rejected']) for _ in range(df.shape[0])]

df['Customer_Type'] = df['Description'].apply(lambda x: 'New Patient' if 'new' in x.lower() else 'Other')

# Save to CSV
df.to_csv('healthcare.csv', index=False)

# Display first few rows
print(df.head())


                             Description      Amount   Category  \
0        Ambulance Transport Service Fee   214222.44    Revenue   
1                       Health Insurance   521934.55    Expense   
2   Dividend Declaration to Shareholders   199618.17     Equity   
3   Mental Health Counseling Session Fee  1471679.21    Revenue   
4  Loan from Bank for Facility Expansion   782170.74  Liability   

                            Subcategory        Date  Company ID    Industry  \
0      Emergency Medical Transportation  2024-05-14        2070  Healthcare   
1  Insurance company cover medical cost  2024-05-14        8917  Healthcare   
2               Distribution of Profits  2024-05-14        2027  Healthcare   
3                 Psychotherapy Session  2024-05-14        4386  Healthcare   
4                        Long-Term Debt  2024-05-14        7031  Healthcare   

         Vendor_or_Consumer Payment_Method Currency Geographical_Location  \
0     Microsoft Corporation  Bank Transfer   

In [None]:
df =pd.read_csv('/content/healthcare.csv')
df['Description'].value_counts()

Description
Acquisition of Ambulance Fleet                                    80
Treasury Stock Repurchase                                         73
Physical Therapy Session Fee                                      72
Training Expense                                                  72
Dividend Declaration to Shareholders                              71
Accounts Payable for Medical Supplies                             70
Accrued Salaries Payable                                          70
Medical Staff Salaries Payment                                    68
Lease Obligations for Medical Equipment                           67
Health and Wellness Program Enrollment                            67
Accounts Payable for Utility Services                             66
Purchase of MRI Machine                                           65
Health Insurance                                                  65
Rent Payment for Medical Office Space                             64
Capital Contribution f

In [None]:
def search_data(keyword):
    """
    Searches the dataset for the given keyword.

    Parameters:
        keyword (str): The keyword to search for in the dataset.

    Returns:
        pandas.DataFrame: A DataFrame containing the filtered results.
    """
    # Convert the keyword to lowercase for case-insensitive search
    keyword = keyword.lower()

    # Filter the DataFrame based on the keyword in the 'Description' column
    filtered_df = df[df['Description'].str.lower().str.contains(keyword)]

    return filtered_df


search_keyword = input("Enter a keyword to search: ")
results = search_data(search_keyword)

if results.empty:
    print("No matching records found.")
else:
    print("Matching records found:")
    print(results)

Enter a keyword to search: Medical Insurance
Matching records found:
            Description     Amount      Category               Subcategory  \
104   Medical Insurance  712745.01  Other Income       Capitation Payments   
143   Medical Insurance  351394.35  Other Income  Insurance reimbursements   
156   Medical Insurance  408694.87  Other Income  Risk Adjustment Payments   
222   Medical Insurance  548498.71  Other Income       Capitation Payments   
231   Medical Insurance  950102.87  Other Income  Insurance reimbursements   
312   Medical Insurance  502768.17  Other Income       Capitation Payments   
318   Medical Insurance  268034.07  Other Income         Premium insurance   
331   Medical Insurance  287009.58  Other Income  Insurance reimbursements   
394   Medical Insurance  325275.28  Other Income       Capitation Payments   
590   Medical Insurance  941664.14  Other Income  Risk Adjustment Payments   
620   Medical Insurance   62476.08  Other Income       Capitation Payment