In [8]:
from faker import Faker
faker = Faker('pt_BR')
Faker.seed(0)


# Sales Parts

I'm first going to generate the _Sales_Parts_ table, that contains all the **parts sold directly to the costumer**

- There are 36 possible parts, and some of them much more likely needing repair than others.
- The quantity is at least 1 and at most 4, except for those parts in _single_quantity_parts_, that will never have more than one unity.
- All the _CREATE_DATE_ dates are this year (2024).
- Since we will have 5 000 unique _DEALER_ORDER_ID_ and in each event more than one part can be sold, I'm assuming that **on avarage** two different parts are sold for each deal, we have a total of 10 000 parts sold in this table.

In [10]:
parts = {
    "BP001": "Battery Pack",
    "EM002": "Electric Motor",
    "IN003": "Inverter",
    "CP004": "Charging Port",
    "DC005": "DC-DC Converter",
    "BMS006": "Battery Management System (BMS)",
    "PEC007": "Power Electronics Controller",
    "TMS008": "Thermal Management System",
    "RBS009": "Regenerative Braking System",
    "OC010": "Onboard Charger",
    "HVC011": "High Voltage Cables",
    "EDU012": "Electric Drive Unit",
    "RG013": "Reduction Gear",
    "HP014": "Heat Pump",
    "EAC015": "Electric Air Conditioning Compressor",
    "EWP016": "Electric Water Pump",
    "BP017": "Brake Pads",
    "BD018": "Brake Discs",
    "TI019": "Tires",
    "WB020": "Wheel Bearings",
    "WS021": "Windshield",
    "MI022": "Mirrors",
    "HL023": "Headlights",
    "TL024": "Taillights",
    "WB025": "Wiper Blades",
    "SC026": "Suspension Components (Shocks, Struts)",
    "CA027": "Control Arms",
    "TRE028": "Tie Rod Ends",
    "CVJ029": "CV Joints",
    "AX030": "Axles",
    "CL031": "Coolant (for thermal management)",
    "BF032": "Brake Fluid",
    "PSF033": "Power Steering Fluid (if applicable)",
    "CAF034": "Cabin Air Filter",
    "FB035": "Fuse Box",
    "AB036": "12V Auxiliary Battery"
}

single_quantity_parts = [   
    "BP001",  # Battery Pack
    "EM002",  # Electric Motor
    "IN003",  # Inverter
    "CP004",  # Charging Port
    "DC005",  # DC-DC Converter
    "BMS006", # Battery Management System (BMS)
    "PEC007", # Power Electronics Controller
    "TMS008", # Thermal Management System
    "RBS009", # Regenerative Braking System
    "OC010",  # Onboard Charger
    "EDU012", # Electric Drive Unit
    "RG013",  # Reduction Gear
    "HP014",  # Heat Pump
    "EAC015", # Electric Air Conditioning Compressor
    "EWP016", # Electric Water Pump
    "WS021",  # Windshield
    "FB035",  # Fuse Box
    "AB036"   # 12V Auxiliary Battery
]

In [12]:
parts_with_weights = [
    ("BP001", "Battery Pack", 1),
    ("EM002", "Electric Motor", 1),
    ("IN003", "Inverter", 2),
    ("CP004", "Charging Port", 1),
    ("DC005", "DC-DC Converter", 1),
    ("BMS006", "Battery Management System (BMS)", 1),
    ("PEC007", "Power Electronics Controller", 1),
    ("TMS008", "Thermal Management System", 2),
    ("RBS009", "Regenerative Braking System", 2),
    ("OC010", "Onboard Charger", 1),
    ("HVC011", "High Voltage Cables", 1),
    ("EDU012", "Electric Drive Unit", 1),
    ("RG013", "Reduction Gear", 1),
    ("HP014", "Heat Pump", 1),
    ("EAC015", "Electric Air Conditioning Compressor", 2),
    ("EWP016", "Electric Water Pump", 2),
    ("BP017", "Brake Pads", 5),
    ("BD018", "Brake Discs", 3),
    ("TI019", "Tires", 4),
    ("WB020", "Wheel Bearings", 2),
    ("WS021", "Windshield", 1),
    ("MI022", "Mirrors", 1),
    ("HL023", "Headlights", 2),
    ("TL024", "Taillights", 2),
    ("WB025", "Wiper Blades", 4),
    ("SC026", "Suspension Components (Shocks, Struts)", 3),
    ("CA027", "Control Arms", 2),
    ("TRE028", "Tie Rod Ends", 2),
    ("CVJ029", "CV Joints", 2),
    ("AX030", "Axles", 2),
    ("CL031", "Coolant (for thermal management)", 2),
    ("BF032", "Brake Fluid", 2),
    ("PSF033", "Power Steering Fluid (if applicable)", 1),
    ("CAF034", "Cabin Air Filter", 4),
    ("FB035", "Fuse Box", 1),
    ("AB036", "12V Auxiliary Battery", 1)
]

In [26]:
import pandas as pd
import random
from random import randint

def generate_sales_parts_random(x):
    
    data = pd.DataFrame()   
    
    for i in range(1,x+1):
        part_code = random.choice(list(parts.keys()))
        part_name = parts[part_code]

        create_date = faker.date_this_year()

        data.loc[i,'SALE_PART_ID'] = i
        data.loc[i,'DEALER_ORDER_ID'] = randint(1,5000)
        data.loc[i,'PART_CODE'] = part_code
        data.loc[i,'PART_NAME'] = part_name
        
        if part_code in single_quantity_parts:
            data.loc[i,'PART_QUANTITY'] = 1
        else:
            data.loc[i,'PART_QUANTITY'] = randint(1,4)
            
        data.loc[i,'CREATE_BY'] = '9999999999'
        data.loc[i,'CREATE_DATE'] = create_date
        data.loc[i,'UPDATE_BY'] = '9999999988'
        data.loc[i,'UPDATE_DATE'] = faker.date_between(create_date)
    return data


def generate_sales_parts_weighted(x):

    part_codes,part_names,part_weights = zip(*parts_with_weights)
    
    data = pd.DataFrame()   
    
    for i in range(1,x+1):

        part_index = random.choices(range(len(part_codes)), weights = part_weights, k=1)[0] ##Gets the one and only element of the returned list
        part_code = part_codes[part_index]
        part_name = part_names[part_index]

        create_date = faker.date_this_year()

        data.loc[i,'SALE_PART_ID'] = i
        data.loc[i,'DEALER_ORDER_ID'] = randint(1,5000)
        data.loc[i,'PART_CODE'] = part_code
        data.loc[i,'PART_NAME'] = part_name
        
        if part_code in single_quantity_parts:
            data.loc[i,'PART_QUANTITY'] = 1
        else:
            data.loc[i,'PART_QUANTITY'] = randint(1,4)
            
        data.loc[i,'CREATE_BY'] = '9999999999'
        data.loc[i,'CREATE_DATE'] = create_date
        data.loc[i,'UPDATE_BY'] = '9999999988'
        data.loc[i,'UPDATE_DATE'] = faker.date_between(create_date)
    return data




In [28]:
sales_parts = generate_sales_parts_weighted(10000)

In [29]:
sales_parts

Unnamed: 0,SALE_PART_ID,DEALER_ORDER_ID,PART_CODE,PART_NAME,PART_QUANTITY,CREATE_BY,CREATE_DATE,UPDATE_BY,UPDATE_DATE
1,1.0,3485.0,TMS008,Thermal Management System,1.0,9999999999,2024-05-29,9999999988,2024-07-08
2,2.0,1390.0,WB020,Wheel Bearings,3.0,9999999999,2024-01-16,9999999988,2024-03-06
3,3.0,2542.0,CP004,Charging Port,1.0,9999999999,2024-07-17,9999999988,2024-07-19
4,4.0,2846.0,EWP016,Electric Water Pump,1.0,9999999999,2024-06-06,9999999988,2024-07-20
5,5.0,1083.0,HL023,Headlights,1.0,9999999999,2024-04-27,9999999988,2024-06-12
...,...,...,...,...,...,...,...,...,...
9996,9996.0,2763.0,HL023,Headlights,1.0,9999999999,2024-02-13,9999999988,2024-06-09
9997,9997.0,2168.0,TRE028,Tie Rod Ends,1.0,9999999999,2024-04-25,9999999988,2024-05-13
9998,9998.0,3825.0,EAC015,Electric Air Conditioning Compressor,1.0,9999999999,2024-05-31,9999999988,2024-05-31
9999,9999.0,3192.0,SC026,"Suspension Components (Shocks, Struts)",4.0,9999999999,2024-03-24,9999999988,2024-04-19


In [32]:
sales_parts.to_csv('sales_parts_data_weigth.csv',index=False)

# Parts Data

All the parts used in the repair process, including those whose the need were discovered during the repair.

- It has all the Parts sold in Sales Parts
- Also has some parts _extra_parts_.

In [87]:
extra_parts = {
    "CP004": "Charging Port",
    "DC005": "DC-DC Converter",
    "BMS006": "Battery Management System (BMS)",
    "PEC007": "Power Electronics Controller",
    "RBS009": "Regenerative Braking System",
    "OC010": "Onboard Charger",
    "HP014": "Heat Pump",
    "EAC015": "Electric Air Conditioning Compressor",
    "EWP016": "Electric Water Pump",
    "BP017": "Brake Pads",
    "WB020": "Wheel Bearings",
    "WB025": "Wiper Blades",
    "SC026": "Suspension Components (Shocks, Struts)",
    "TRE028": "Tie Rod Ends",
    "CVJ029": "CV Joints",
    "AX030": "Axles",
    "CL031": "Coolant (for thermal management)",
    "BF032": "Brake Fluid",
    "PSF033": "Power Steering Fluid (if applicable)",
    "CAF034": "Cabin Air Filter",
    "AB036": "12V Auxiliary Battery"
}


In [95]:
def generate_repair_order_parts_from_sales_parts(sales_parts_data):

    data = pd.DataFrame()
    
    for i, row in sales_parts_data.iterrows():   #Adds all the parts sold directly to the costumer in sales_parts table
        
        t = i
    
        create_date = row['CREATE_DATE']

        data.loc[i,'SALE_PART_ID'] = i + 1
        data.loc[i,'DEALER_ORDER_ID'] = row['DEALER_ORDER_ID']
        data.loc[i,'PART_CODE'] = row['PART_CODE']
        data.loc[i,'PART_NAME'] = row['PART_NAME']
        data.loc[i,'PART_QUANTITY'] = row['PART_QUANTITY']
        data.loc[i,'MAIN_PART_FLAG'] = 'N'
        data.loc[i,'DOWN_PART_CODE'] = '000'
        data.loc[i,'UP_PART_CODE'] = '111'
        data.loc[i,'CREATE_BY'] = '9999999999'
        data.loc[i,'CREATE_DATE'] = create_date
        data.loc[i,'UPDATE_BY'] = '9999999988'
        data.loc[i,'UPDATE_DATE'] = faker.date_between(create_date)

    
    for i in range (t + 1, t + t//3):    #Suppose 1/3 of the orders need some extra part

        part_code = random.choice(list(extra_parts.keys()))
        part_name = parts[part_code]

        create_date = faker.date_this_year()
        
        data.loc[i,'SALE_PART_ID'] = i
        data.loc[i,'DEALER_ORDER_ID'] = randint(1,5000)
        data.loc[i,'PART_CODE'] = part_code
        data.loc[i,'PART_NAME'] = part_name
        data.loc[i,'PART_QUANTITY'] = 1
        data.loc[i,'MAIN_PART_FLAG'] = 'N'
        data.loc[i,'DOWN_PART_CODE'] = '000'
        data.loc[i,'UP_PART_CODE'] = '111'
        data.loc[i,'CREATE_BY'] = '9999999999'
        data.loc[i,'CREATE_DATE'] = create_date
        data.loc[i,'UPDATE_BY'] = '9999999988'
        data.loc[i,'UPDATE_DATE'] = faker.date_between(create_date)

    return data
    

In [97]:
parts_data = generate_repair_order_parts_from_sales_parts(sales_parts)
parts_data
parts_data.to_csv('parts_data.csv',index = False )

# Repair Order Labour Data

This table contains all the labours needed in a order.

- There are 20 possible labour activities
- These labours are mapped to the parts, since each labours uses a part.
- This table is based on the Parts table.

In [74]:
labours = {
    "LR001": "Battery Replacement",
    "LR002": "Motor Repair",
    "LR003": "Inverter Replacement",
    "LR004": "Charging Port Repair",
    "LR005": "DC-DC Converter Replacement",
    "LR006": "BMS Calibration",
    "LR007": "Power Electronics Repair",
    "LR008": "Thermal Management Service",
    "LR009": "Regenerative Braking Adjustment",
    "LR010": "Onboard Charger Repair",
    "LR011": "Drive Unit Maintenance",
    "LR012": "Gearbox Service",
    "LR013": "Heat Pump Repair",
    "LR014": "AC Compressor Service",
    "LR015": "Water Pump Replacement",
    "LR016": "Brake System Service",
    "LR017": "Suspension Repair",
    "LR018": "Headlight Alignment",
    "LR019": "Windshield Replacement",
    "LR020": "Fuse Replacement"
}

parts_to_labours_mapping = {
    "BP0001": "LR001",
    "EM0002": "LR002",
    "IN0003": "LR003",
    "CP0004": "LR004",
    "DC0005": "LR005",
    "BMS006": "LR006",
    "PEC007": "LR007",
    "TMS008": "LR008",
    "RBS009": "LR009",
    "OC0100": "LR010",
    "HVC011": "LR010",
    "EDU012": "LR011",
    "RG0130": "LR012",
    "HP0140": "LR013",
    "EAC015": "LR014",
    "EWP016": "LR015",
    "BP0170": "LR016",
    "BD0180": "LR016",
    "TI0190": "LR016",
    "WB0200": "LR016",
    "WS0210": "LR019",
    "MI0220": "LR019",
    "HL0230": "LR018",
    "TL0240": "LR018",
    "WB0250": "LR018",
    "SC0260": "LR017",
    "CA0270": "LR017",
    "TRE028": "LR017",
    "CVJ029": "LR017",
    "AX0300": "LR017",
    "CL0310": "LR013",
    "BF0320": "LR013",
    "PSF033": "LR013",
    "CAF034": "LR014",
    "FB0350": "LR020",
    "AB0360": "LR020"
}

labours_extra = {                       #Labours that are not directly linked to parts
    "LR021": "Software Update",
    "LR022": "Diagnostic Check",
    "LR023": "Annual Maintenance",
    "LR024": "Battery Health Check",
    "LR025": "General Safety Inspection"
}

In [2]:
def generate_labours(parts_data):

    data = pd.DataFrame()
    
    for index, row in parts_data.iterrows():
        
        t = index
        part_code = row['PART_CODE']
        labour_code = f"W{parts_to_labours_mapping[part_code]}"
        labour_name = labours[labour_code]

        create_date = faker.date_between(row['CREATE_DATE']) #Date of the labour (between the selling of parts and now days)

        data.loc[index,'LABOUR_ID'] = index + 1
        data.loc[index,'DEALER_ORDER_ID'] = row['DEALER_ORDER_ID']
        data.loc[index,'LABOUR_CODE'] = labour_code
        data.loc[index,'LABOUR_NAME'] = labour_name
        data.loc[index,'CREATE_BY'] = '9999999999'
        data.loc[index,'CREATE_DATE'] = create_date
        data.loc[index,'UPDATE_BY'] = '9999999988'
        data.loc[index,'UPDATE_DATE'] = faker.date_between(create_date)

    
    for i in range (t, t + t//3):    #Suppose 1/2 of the orders need some of the labours_extra

        labour_code = random.choice(list(labours_extra.keys()))
        labour_name = labours[labour_code]

        create_date = faker.date_this_year()
        
        data.loc[i,'LABOUR_ID'] = i
        data.loc[i,'DEALER_ORDER_ID'] = randint(1,5000)
        data.loc[i,'LABOUR_CODE'] = labour_code
        data.loc[i,'LABOUR_NAME'] = labour_name
        data.loc[i,'CREATE_BY'] = '9999999999'
        data.loc[i,'CREATE_DATE'] = create_date
        data.loc[i,'UPDATE_BY'] = '9999999988'
        data.loc[i,'UPDATE_DATE'] = faker.date_between(create_date)
        
    return data
    
    

In [60]:
repair_order_labours = generate_repair_order_labours(10000) #Let's say each order needs two labours
repair_order_labours
repair_order_labours.to_csv('repair_order_labours.csv',index=False)

Unnamed: 0,LABOUR_ID,DEALER_ORDER_ID,LABOUR_CODE,LABOUR_NAME,CREATE_BY,CREATE_DATE,UPDATE_BY,UPDATE_DATE
1,1.0,3484.0,LR002,Motor Repair,9999999999,2024-01-15,9999999988,2024-06-25
2,2.0,722.0,LR013,Heat Pump Repair,9999999999,2024-03-27,9999999988,2024-04-28
3,3.0,1038.0,LR015,Water Pump Replacement,9999999999,2024-03-10,9999999988,2024-03-16
4,4.0,1327.0,LR016,Brake System Service,9999999999,2024-05-12,9999999988,2024-06-16
5,5.0,4726.0,LR019,Windshield Replacement,9999999999,2024-01-06,9999999988,2024-03-04
...,...,...,...,...,...,...,...,...
9996,9996.0,1923.0,LR005,DC-DC Converter Replacement,9999999999,2024-02-06,9999999988,2024-02-26
9997,9997.0,1418.0,LR007,Power Electronics Repair,9999999999,2024-02-27,9999999988,2024-03-17
9998,9998.0,4905.0,LR010,Onboard Charger Repair,9999999999,2024-02-04,9999999988,2024-05-31
9999,9999.0,2096.0,LR014,AC Compressor Service,9999999999,2024-02-13,9999999988,2024-06-24


In [101]:
repair_order_labours_related = generate_repair_order_labours_from_sales_parts(parts_data) #Let's say each order needs two labours
repair_order_labours_related
repair_order_labours_related.to_csv('labours.csv',index=False)

Unnamed: 0,LABOUR_ID,DEALER_ORDER_ID,LABOUR_CODE,LABOUR_NAME,CREATE_BY,CREATE_DATE,UPDATE_BY,UPDATE_DATE
1,2.0,3485.0,LR008,Thermal Management Service,9999999999,2024-07-15,9999999988,2024-07-20
2,3.0,1390.0,LR016,Brake System Service,9999999999,2024-05-18,9999999988,2024-05-26
3,4.0,2542.0,LR004,Charging Port Repair,9999999999,2024-07-22,9999999988,2024-07-22
4,5.0,2846.0,LR015,Water Pump Replacement,9999999999,2024-07-12,9999999988,2024-07-14
5,6.0,1083.0,LR018,Headlight Alignment,9999999999,2024-05-14,9999999988,2024-06-08
...,...,...,...,...,...,...,...,...
17771,17771.0,1745.0,LR015,Water Pump Replacement,9999999999,2024-02-17,9999999988,2024-04-03
17772,17772.0,1536.0,LR014,AC Compressor Service,9999999999,2024-04-13,9999999988,2024-04-27
17773,17773.0,1684.0,LR018,Headlight Alignment,9999999999,2024-01-12,9999999988,2024-04-04
17774,17774.0,2465.0,LR015,Water Pump Replacement,9999999999,2024-05-30,9999999988,2024-06-21


# Repair Order Labour Info Data

This table contains all the vehicle informations related to each labour

- Each Labour has a main category, wich might be **Eletrical, Mechanical, Software, Safety or Maintenance**.
- Each car has its name and a **weight**, that is based on its **popularity**.


In [152]:
vehicles = {
    "BYD001": ("Dolphin", 10),
    "BYD002": ("Dolphin Mini", 8),
    "BYD003": ("Han", 5),
    "BYD004": ("Qin", 5),
    "BYD005": ("Yuan", 5),
    "BYD006": ("Song", 6),
    "BYD007": ("Tang", 4),
    "BYD008": ("Shark", 3),
    "BYD009": ("Seal", 2),
    "BYD010": ("King", 2)
}


labour_category_mapping = {
    "LR001": ("Electrical", "Battery"),
    "LR002": ("Electrical", "Motor"),
    "LR003": ("Electrical", "Inverter"),
    "LR004": ("Electrical", "Charging Port"),
    "LR005": ("Electrical", "DC-DC Converter"),
    "LR006": ("Electrical", "BMS"),
    "LR007": ("Electrical", "Power Electronics"),
    "LR008": ("Electrical", "Thermal Management"),
    "LR009": ("Electrical", "Regenerative Braking"),
    "LR010": ("Electrical", "Onboard Charger"),
    "LR011": ("Electrical", "Drive Unit"),
    "LR012": ("Mechanical", "Gearbox"),
    "LR013": ("Mechanical", "Heat Pump"),
    "LR014": ("Mechanical", "AC Compressor"),
    "LR015": ("Mechanical", "Water Pump"),
    "LR016": ("Mechanical", "Brakes"),
    "LR017": ("Mechanical", "Suspension"),
    "LR018": ("Mechanical", "Headlight Alignment"),
    "LR019": ("Safety", "Windshield"),
    "LR020": ("Safety", "Fuse"),
    "LR021": ("Software", "Update"),
    "LR022": ("Software", "Diagnostic Check"),
    "LR023": ("Maintenance", "Annual Maintenance"),
    "LR024": ("Electrical", "Battery Health Check"),
    "LR025": ("Safety", "General Safety Inspection")
}

In [154]:
vehicles.items()

dict_items([('BYD001', ('Dolphin', 10)), ('BYD002', ('Dolphin Mini', 8)), ('BYD003', ('Han', 5)), ('BYD004', ('Qin', 5)), ('BYD005', ('Yuan', 5)), ('BYD006', ('Song', 6)), ('BYD007', ('Tang', 4)), ('BYD008', ('Shark', 3)), ('BYD009', ('Seal', 2)), ('BYD010', ('King', 2))])

In [188]:
def generate_labour_info(repair_order_labours):

    data = pd.DataFrame()

    vehicle_codes, weights = zip(*[(k,w[1]) for k, w in vehicles.items()])

    for i, row in repair_order_labours.iterrows():    #Adds Labour info for each labour

        vehicle_code = random.choices(vehicle_codes, weights=weights, k=1)[0]

        labour_code = row['LABOUR_CODE']

        if labour_code in ["LR001","LR006"]:
            battery_pack_repair = 'Y'
        else:
            battery_pack_repair = 'N'
            
        #data.loc[i, 'VEHICLE_CODE'] = random.choice(list(vehicles.keys()))
        data.loc[i, 'VEHICLE_CODE'] = f"{randint(1,100000):6d}"
        data.loc[i, 'VEHICLE_NAME'] = vehicles[vehicle_code][0]
        #data.loc[i, 'LABOUR_ID'] = row['LABOUR_ID']
        data.loc[i, 'LABOUR_CODE'] = labour_code
        data.loc[i, 'LABOUR_NAME'] = labours[labour_code]
        data.loc[i, 'MAIN_CATEGORY'] = labour_category_mapping[labour_code][0]
        data.loc[i, 'SECOND_CATEGORY'] = labour_category_mapping[labour_code][1]
        data.loc[i, 'BATTERY_REPAIR'] = battery_pack_repair

    return data




In [189]:
labours_info = generate_labour_info(repair_order_labours_related) #Let's say each order needs two labours
labours_info.to_csv('labours_info.csv',index=False)

In [190]:
labours_info

Unnamed: 0,VEHICLE_CODE,VEHICLE_NAME,LABOUR_CODE,LABOUR_NAME,MAIN_CATEGORY,SECOND_CATEGORY,BATTERY_REPAIR
1,52970,Shark,LR008,Thermal Management Service,Electrical,Thermal Management,N
2,43956,Han,LR016,Brake System Service,Mechanical,Brakes,N
3,33056,Tang,LR004,Charging Port Repair,Electrical,Charging Port,N
4,88856,Seal,LR015,Water Pump Replacement,Mechanical,Water Pump,N
5,31386,Dolphin,LR018,Headlight Alignment,Mechanical,Headlight Alignment,N
...,...,...,...,...,...,...,...
17771,77737,Dolphin Mini,LR015,Water Pump Replacement,Mechanical,Water Pump,N
17772,95733,Dolphin,LR014,AC Compressor Service,Mechanical,AC Compressor,N
17773,66758,Song,LR018,Headlight Alignment,Mechanical,Headlight Alignment,N
17774,11233,Seal,LR015,Water Pump Replacement,Mechanical,Water Pump,N


# Repair Order Main Data

This table contains all general informations related to the Repair Order


In [237]:
def generate_main_table(num_records):
    data = pd.DataFrame()

    for i in range(num_records):
        create_date = faker.date_this_year()
        update_date = faker.date_between(start_date=create_date)

        first_name = faker.first_name()
        last_name = faker.last_name()
        dominio = random.choice(["@gmail.com","@outlook.com","@yahoo.com.br","uol.com.br"])
        email = f"{first_name}.{last_name}@{dominio}"
        full_name = f"{first_name} {last_name}"

        data.loc[i, 'DEALER_ORDER_ID'] = i + 1
        data.loc[i, 'DEALER_ID'] = randint(1,50)
        data.loc[i, 'DEALER_ORDER_NO'] = randint(20,120)    #Inconsistent for now, it isnt tracking each dealer's order count
        data.loc[i, 'CREATE_BY'] = '9999999999'
        data.loc[i, 'CREATE_DATE'] = create_date
        data.loc[i, 'UPDATE_BY'] = '9999999988'
        data.loc[i, 'UPDATE_DATE'] = update_date
        data.loc[i, 'REPAIR_TYPE'] = random.choice(['Warranty', 'Non-Warranty'])
        data.loc[i, 'ORDER_STATUS'] = random.choice(['Pending', 'In Progress', 'Completed'])
        data.loc[i, 'ORDER_CREATE_DATE'] = create_date
        data.loc[i, 'ORDER_LAST_BALANCE_DATE'] = update_date
        data.loc[i, 'VIN'] = faker.unique.license_plate()
        data.loc[i, 'CUST_NAME'] = full_name
        data.loc[i, 'CUST_EMAIL'] = email
        data.loc[i, 'DELIVERER'] = faker.name()
        data.loc[i, 'DELEVERER_MOBILE'] = faker.phone_number()
        data.loc[i, 'POWER_MILEAGE'] = faker.random_int(min=0, max=50000)
        data.loc[i, 'TOTAL_MILEAGE'] = faker.random_int(min=0, max=100000)
        data.loc[i, 'CREATE_CLAIM_FLAG'] = random.choice(['Y', 'N'])
        data.loc[i, 'CLAIM_NO'] = faker.unique.uuid4() if data.loc[i, 'CREATE_CLAIM_FLAG'] == 'Y' else None
        data.loc[i, 'ACTIVITY_FLAG'] = random.choice(['Y', 'N'])
        data.loc[i, 'ACTIVITY_CODE'] = faker.unique.uuid4() if data.loc[i, 'ACTIVITY_FLAG'] == 'Y' else None
        data.loc[i, 'TROUBLE_DESC'] = None
        data.loc[i, 'DELIVER_PROBLEM'] = None
        data.loc[i, 'CHECK_RESULT'] = None
        data.loc[i, 'IS_SECSTORE'] = random.choice(['Y', 'N'])
        data.loc[i, 'SECSTORE_ID'] = faker.unique.uuid4() if data.loc[i, 'IS_SECSTORE'] == 'Y' else None

    return data

In [239]:
main_table = generate_main_table(5000)
main_table.to_csv('main_table.csv',index=False)

In [240]:
main_table

Unnamed: 0,DEALER_ORDER_ID,DEALER_ID,DEALER_ORDER_NO,CREATE_BY,CREATE_DATE,UPDATE_BY,UPDATE_DATE,REPAIR_TYPE,ORDER_STATUS,ORDER_CREATE_DATE,...,TOTAL_MILEAGE,CREATE_CLAIM_FLAG,CLAIM_NO,ACTIVITY_FLAG,ACTIVITY_CODE,TROUBLE_DESC,DELIVER_PROBLEM,CHECK_RESULT,IS_SECSTORE,SECSTORE_ID
0,1.0,14.0,20.0,9999999999,2024-01-04,9999999988,2024-04-28,Non-Warranty,Pending,2024-01-04,...,13511.0,Y,ff21fd1b-0a07-4084-b1f2-a6b71f61a1a6,N,,,,,N,
1,2.0,2.0,113.0,9999999999,2024-04-21,9999999988,2024-06-23,Warranty,Completed,2024-04-21,...,54686.0,Y,ce57d78f-187f-4e6a-a730-9d681a9f303e,Y,ce8a4761-ca6f-4e2d-b8bb-8e8fefb13117,,,,N,
2,3.0,43.0,94.0,9999999999,2024-01-14,9999999988,2024-04-22,Non-Warranty,Completed,2024-01-14,...,29412.0,Y,22a59014-dd42-4797-8983-4026bea3fe39,Y,d15eedae-726f-485d-8445-d3c3f3520ddd,,,,Y,b7c8535c-95e1-49a6-a7d5-dd6a402a3d1f
3,4.0,10.0,24.0,9999999999,2024-03-06,9999999988,2024-04-16,Non-Warranty,Pending,2024-03-06,...,8177.0,N,,N,,,,,N,
4,5.0,41.0,84.0,9999999999,2024-05-22,9999999988,2024-07-20,Non-Warranty,Completed,2024-05-22,...,41185.0,Y,2663dc31-1ca5-416b-8e3b-8c016b95c1a9,Y,02fbfed9-fae2-49ce-941c-dede6975deb0,,,,Y,e0d0ef45-051b-472a-9f3c-9a7a55a61a94
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4995,4996.0,12.0,91.0,9999999999,2024-07-08,9999999988,2024-07-18,Warranty,Pending,2024-07-08,...,74206.0,Y,d7498ed7-97ee-4b10-ad47-1c833499b60d,Y,b6cf7628-aeb0-4064-bff5-c338fde26695,,,,N,
4996,4997.0,27.0,63.0,9999999999,2024-02-27,9999999988,2024-03-02,Warranty,In Progress,2024-02-27,...,70458.0,N,,N,,,,,Y,1b5a4d25-4000-4053-a3f2-88c3ee2d7826
4997,4998.0,21.0,74.0,9999999999,2024-04-17,9999999988,2024-04-21,Warranty,In Progress,2024-04-17,...,23013.0,N,,N,,,,,N,
4998,4999.0,19.0,89.0,9999999999,2024-05-27,9999999988,2024-06-24,Non-Warranty,In Progress,2024-05-27,...,81031.0,N,,Y,1aa3d606-5afb-4437-ae27-ac9d74657118,,,,N,


# Parts Info Table
This table contains information about the 36 possible parts

In [289]:
def generate_part_info():
    data = pd.DataFrame()
    i = 1
    for part in parts:

        part_code = part
        sap_material_name = parts[part_code]
        
        data.loc[i, 'PART_CODE'] = part_code
        data.loc[i, 'SAP Material name'] = sap_material_name
        data.loc[i, 'Local Material Code'] = f"LOC-{random.randint(1000, 9999)}"
        data.loc[i, 'Unit'] = random.choice(['PCS', 'SET', 'EA'])
        data.loc[i, 'Discount Category'] = random.choice(['A', 'B', 'C'])
        data.loc[i, 'Dangerous goods'] = random.choice(['Y', 'N'])
        data.loc[i, 'Order permission'] = random.choice(['Allowed', 'Restricted'])
        data.loc[i, 'Order type'] = random.choice(['Standard', 'Urgent'])
        data.loc[i, 'Selling price'] = round(random.uniform(10.0, 500.0), 2)
        data.loc[i, 'Suggested retail price'] = round(data.loc[i, 'Selling price'] * 1.1, 2)
        data.loc[i, 'Warranty Parts Price'] = round(data.loc[i, 'Selling price'] * 0.9, 2)
        data.loc[i, 'Single vehicle consumption'] = random.randint(1, 5)
        data.loc[i, 'SAP Material No.'] = f"MAT-{random.randint(100000, 999999)}"
        data.loc[i, 'SAP Material Name'] = sap_material_name
        data.loc[i, 'Manual code'] = f"MAN-{random.randint(1000, 9999)}"
        data.loc[i, 'Customs code'] = f"CUS-{random.randint(100000, 999999)}"
        data.loc[i, 'Vehicle series'] = random.choice(['EV', 'Sedan', 'SUV'])
        data.loc[i, 'Scope of services'] = random.choice(['Full', 'Partial'])
        data.loc[i, 'Minimum packing quantity'] = random.randint(1, 10)
        data.loc[i, 'Volume'] = round(random.uniform(0.1, 10.0), 2)
        data.loc[i, 'Region'] = random.choice(['North America', 'Europe', 'Asia'])
        data.loc[i, 'Whether it is Auxiliary material'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether it is Included in the ordering target of spare parts and supplies'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether the allowance for spare parts is included'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether it is Maintenance part'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether there is a commission'] = random.choice(['Y', 'N'])
        data.loc[i, 'Model description'] = f"Model-{random.randint(1000, 9999)}"
        data.loc[i, 'Whether there is allowance for sheet metal spraying'] = random.choice(['Y', 'N'])
        data.loc[i, 'Covering parts for sheet metal spraying or not'] = random.choice(['Y', 'N'])
        data.loc[i, 'Spare parts category'] = random.choice(['Category A', 'Category B', 'Category C'])
        data.loc[i, 'Spare parts type'] = random.choice(['Type 1', 'Type 2', 'Type 3'])
        data.loc[i, 'Code of replace'] = f"REP-{random.randint(1000, 9999)}"
        data.loc[i, 'Post-Package weight'] = round(random.uniform(0.1, 20.0), 2)
        data.loc[i, 'Packaging specifications'] = f"{random.randint(10, 100)}x{random.randint(10, 100)}x{random.randint(10, 100)}"
        data.loc[i, 'Whether to trace'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether it is In short supply'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether retail'] = random.choice(['Y', 'N'])
        data.loc[i, 'Expediting coefficient'] = round(random.uniform(0.1, 2.0), 2)
        data.loc[i, 'Whether it is A Self-made part'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether it is Included in turnover assessment'] = random.choice(['Y', 'N'])
        data.loc[i, 'Remarks'] = faker.text(max_nb_chars=100)
        data.loc[i, 'Planner'] = faker.name()
        data.loc[i, 'Whether it is The first maintenance'] = random.choice(['Y', 'N'])
        data.loc[i, 'Pre-approval or not'] = random.choice(['Y', 'N'])
        data.loc[i, 'Maximum number of orders'] = random.randint(1, 100)
        data.loc[i, 'Classification of accessories'] = random.choice(['Class A', 'Class B', 'Class C'])
        data.loc[i, 'Accessories type'] = random.choice(['Type A', 'Type B', 'Type C'])
        data.loc[i, 'Minimum number of applications'] = random.randint(1, 10)
        data.loc[i, 'Scheduled days'] = random.randint(1, 30)
        data.loc[i, 'Department'] = random.choice(['Dept A', 'Dept B', 'Dept C'])
        data.loc[i, 'Production cycle'] = random.randint(1, 90)
        data.loc[i, 'Highest inventory'] = random.randint(1, 500)
        data.loc[i, 'Arrival warning'] = random.choice(['Y', 'N'])
        data.loc[i, 'Minimum purchase quantity'] = random.randint(1, 10)
        data.loc[i, 'Special for after-sales'] = random.choice(['Y', 'N'])
        data.loc[i, 'The sales store cannot order'] = random.choice(['Y', 'N'])
        data.loc[i, 'Whether the repair paint is certified by the original manufacturer'] = random.choice(['Y', 'N'])
        data.loc[i, 'Special for new energy'] = random.choice(['Y', 'N'])
        data.loc[i, 'Online sales price'] = round(random.uniform(10.0, 500.0), 2)
        data.loc[i, 'Employee price'] = round(random.uniform(10.0, 500.0), 2)
        data.loc[i, 'Department price'] = round(random.uniform(10.0, 500.0), 2)
        data.loc[i, 'Marketing script'] = faker.text(max_nb_chars=200)
        data.loc[i, 'Product description'] = faker.text(max_nb_chars=200)
        data.loc[i, 'Warehouse area'] = random.choice(['Area 1', 'Area 2', 'Area 3'])
        data.loc[i, 'Pictures'] = faker.image_url()
        data.loc[i, 'Type of claim'] = random.choice(['Type A', 'Type B', 'Type C'])
        data.loc[i, 'length width height'] = f"{random.randint(10, 100)}x{random.randint(10, 100)}x{random.randint(10, 100)}"
        data.loc[i, 'Warranty period'] = random.randint(1, 60)
        data.loc[i, 'Coefficient of sheet metal and spray parts'] = round(random.uniform(0.1, 2.0), 2)
        data.loc[i, 'Whether it is Publicized'] = random.choice(['Y', 'N'])
        data.loc[i, 'Material abbreviation'] = f"MAT-{random.randint(1000, 9999)}"
        data.loc[i, 'Whether the motor number is filled in'] = random.choice(['Y', 'N'])
        data.loc[i, 'Planner'] = faker.name()
        data.loc[i, 'Is it oil'] = random.choice(['Y', 'N'])
        data.loc[i, 'Oil type'] = random.choice(['Type 1', 'Type 2', 'Type 3'])
        data.loc[i, 'Capacity (l)'] = round(random.uniform(0.1, 20.0), 2)
        data.loc[i, 'Planner Email'] = faker.email()
        data.loc[i, 'Small Amount Order Pricing Ratio'] = round(random.uniform(0.1, 2.0), 2)
        data.loc[i, 'Whether the material is authorised'] = random.choice(['Y', 'N'])
        data.loc[i, 'Country of origin'] = random.choice(['China', 'Germany', 'Sweden'])
        data.loc[i, 'NOM Certification'] = random.choice(['Y', 'N'])
        data.loc[i, 'Creation date'] = faker.date_this_year()
        i += 1
        
    return data

In [295]:
part_info = generate_part_info()
part_info.to_csv('part_info.csv', index = False)

In [296]:
part_info

Unnamed: 0,PART_CODE,SAP Material name,Local Material Code,Unit,Discount Category,Dangerous goods,Order permission,Order type,Selling price,Suggested retail price,...,Whether the motor number is filled in,Is it oil,Oil type,Capacity (l),Planner Email,Small Amount Order Pricing Ratio,Whether the material is authorised,Country of origin,NOM Certification,Creation date
1,BP001,Battery Pack,LOC-1703,PCS,B,N,Allowed,Urgent,437.83,481.61,...,Y,Y,Type 1,8.33,leaojoao-gabriel@example.com,1.98,Y,Sweden,N,2024-01-06
2,EM002,Electric Motor,LOC-1348,SET,C,Y,Restricted,Standard,357.13,392.84,...,N,N,Type 2,14.81,camargoarthur-gabriel@example.org,0.65,Y,Sweden,N,2024-04-04
3,IN003,Inverter,LOC-7361,SET,C,N,Restricted,Standard,266.19,292.81,...,N,N,Type 1,19.18,otavio47@example.net,1.19,Y,China,Y,2024-02-25
4,CP004,Charging Port,LOC-8223,EA,C,N,Allowed,Urgent,131.81,144.99,...,N,N,Type 3,13.03,wcavalcanti@example.com,1.86,Y,China,N,2024-05-28
5,DC005,DC-DC Converter,LOC-7733,PCS,A,Y,Restricted,Standard,395.62,435.18,...,Y,Y,Type 3,7.38,abreubento@example.net,0.23,N,China,Y,2024-05-17
6,BMS006,Battery Management System (BMS),LOC-1328,EA,B,N,Restricted,Urgent,452.89,498.18,...,Y,Y,Type 1,12.5,ydas-neves@example.com,1.59,Y,Sweden,Y,2024-01-28
7,PEC007,Power Electronics Controller,LOC-5050,EA,C,N,Allowed,Standard,267.51,294.26,...,N,N,Type 3,5.29,sfogaca@example.org,1.11,Y,Sweden,N,2024-01-08
8,TMS008,Thermal Management System,LOC-3361,EA,B,N,Allowed,Urgent,428.87,471.76,...,N,N,Type 1,7.81,gustavo-henriquemontenegro@example.com,1.2,N,Germany,N,2024-06-14
9,RBS009,Regenerative Braking System,LOC-9026,EA,B,N,Allowed,Standard,248.54,273.39,...,Y,Y,Type 2,11.95,natalia40@example.org,0.15,N,China,Y,2024-04-07
10,OC010,Onboard Charger,LOC-1082,SET,B,Y,Allowed,Urgent,440.98,485.08,...,N,N,Type 2,3.75,rocharyan@example.net,1.36,N,Germany,N,2024-01-20
