In [2]:
import random
def divide_number_into_parts(number, n):
    # 生成 n-1 个随机分割点
    splits = sorted(random.sample(range(1, number), n - 1))
    
    # 初始化部分列表
    parts = []
    
    # 初始化上一个分割点
    last_split = 0
    
    # 计算每份的大小
    for split in splits:
        parts.append(split - last_split)
        last_split = split
    # 添加最后一份
    parts.append(number - last_split)
    
    return parts

# 示例使用：
number = 100  # 要分割的数
n = 5         # 分割的份数
parts = divide_number_into_parts(number, n)
print(parts)  # 打印分割后的各份大小

[36, 2, 50, 3, 9]


In [3]:
class FeeDimension:
     # 项目类别
    PROJECT_TYPES = ['1', '2', '3', '4', '5', '6']
    # 原始机构
    ORIGINAL_ORGS = ['1', '2', '3', '4', '5', '6']
    # 摊入机构
    TARGET_ORGS = ['1', '2', '3', '4', '5', '6']
    # 业务标识
    BUSINESS_TAGS = ['1', '2', '3', '4', '5', '6']
    # 费用科目
    FEE_ITEMS = ['1', '2', '3', '4', '5', '6']
    # 费用大类
    FEE_TYPES = ['1', '2', '3', '4', '5', '6']
    # 产品编码
    PRODUCT_CODES = ['1', '2', '3', '4', '5', '6']

In [50]:
import json
import pandas as pd

# 费用定义
class FeeDefinition:

    def __init__(self, 
                 project_type=None, 
                 original_org=None, 
                 target_org=None, 
                 business_tag=None, 
                 fee_item=None,
                 fee_type=None, 
                 product_code=None):
        self.project_type = project_type
        self.original_org = original_org
        self.target_org = target_org
        self.business_tag = business_tag
        self.fee_item = fee_item
        self.fee_type = fee_type
        self.product_code = product_code
        
    def __hash__(self):
        return hash((self.project_type, self.original_org, self.target_org, self.business_tag, self.fee_item, self.fee_type, self.product_code))
    
    def __eq__(self, other):
        if isinstance(other, FeeDefinition):
            return (self.project_type == other.project_type 
                    and self.original_org == other.original_org 
                    and self.target_org == other.target_org 
                    and self.fee_item == other.fee_item 
                    and self.fee_type == other.fee_type 
                    and self.product_code == other.product_code)
        return False
    
    def random(self):
        self.project_type = random.choice(FeeDimension.PROJECT_TYPES)
        self.original_org = random.choice(FeeDimension.ORIGINAL_ORGS)
        self.target_org = random.choice(FeeDimension.TARGET_ORGS)
        self.business_tag = random.choice(FeeDimension.BUSINESS_TAGS)
        self.fee_item = random.choice(FeeDimension.FEE_ITEMS)
        self.fee_type = random.choice(FeeDimension.FEE_TYPES)
        self.product_code = random.choice(FeeDimension.PRODUCT_CODES)
        
    def __repr__(self):
        return json.dumps({
            "project_type": self.project_type,
            "original_org": self.original_org,
            "target_org": self.target_org,
            "business_tag": self.business_tag,
            "fee_item": self.fee_item,
            "fee_type": self.fee_type,
            "product_code": self.product_code
        })
    
def serialize_fee_definition(obj):
    if isinstance(obj, FeeDefinition):
        return str(obj.to_json())
    raise TypeError(f'Object of type {obj.__class__.__name__} is not JSON serializable')
    
# 费用类
class Fee:
    def __init__(self, fee_definition, amount=None):
        self.project_type = fee_definition.project_type
        self.original_org = fee_definition.original_org
        self.target_org = fee_definition.target_org
        self.business_tag = fee_definition.business_tag
        self.fee_item = fee_definition.fee_item
        self.fee_type = fee_definition.fee_type
        self.product_code = fee_definition.product_code
        self.amount = round(random.uniform(1, 100000), 2) if amount is None else amount
        
    
    def __repr__(self):
        return json.dumps({
            "project_type": self.project_type,
            "original_org": self.original_org,
            "target_org": self.target_org,
            "business_tag": self.business_tag,
            "fee_item": self.fee_item,
            "fee_type": self.fee_type,
            "product_code": self.product_code,
            "amount": self.amount
        })
    
    def to_training_format(self):
        return f"{self.project_type}-{self.original_org}-{self.target_org}-{self.business_tag}-{self.fee_item}-{self.fee_type}-{self.product_code}-{self.amount}"
        
        
class Driver:
    
    def __init__(self, elements: dict[FeeDefinition, float]):
        self.elements = []
        for k, v in elements.items():
            row = {
                "project_type": k.project_type,
                "original_org": k.original_org,
                "target_org": k.target_org,
                "business_tag": k.business_tag,
                "fee_item": k.fee_item,
                "fee_type": k.fee_type,
                "product_code": k.product_code,
                "ratio" : v
            }
            self.elements.append(row)
        

In [51]:
def random_driver_elements(number_of_elements: int) -> dict[FeeDefinition, float]:
    parts = divide_number_into_parts(100, number_of_elements)
    elements = {}
    for i in range(number_of_elements):
        fee_def = FeeDefinition()
        fee_def.random()
        ratio = parts[i]/100
        elements[fee_def] = ratio
    return elements

In [52]:
source_fee_def = FeeDefinition()
source_fee_def.random()
source_fee = Fee(source_fee_def)
driver = Driver(random_driver_elements(5))

In [53]:
print(source_fee)
print(driver.elements)

{"project_type": "2", "original_org": "2", "target_org": "4", "business_tag": "5", "fee_item": "2", "fee_type": "4", "product_code": "2", "amount": 82764.31}
[{'project_type': '4', 'original_org': '1', 'target_org': '1', 'business_tag': '1', 'fee_item': '4', 'fee_type': '2', 'product_code': '6', 'ratio': 0.05}, {'project_type': '1', 'original_org': '6', 'target_org': '4', 'business_tag': '3', 'fee_item': '3', 'fee_type': '1', 'product_code': '5', 'ratio': 0.61}, {'project_type': '4', 'original_org': '4', 'target_org': '1', 'business_tag': '2', 'fee_item': '1', 'fee_type': '6', 'product_code': '1', 'ratio': 0.03}, {'project_type': '3', 'original_org': '3', 'target_org': '5', 'business_tag': '4', 'fee_item': '5', 'fee_type': '6', 'product_code': '2', 'ratio': 0.08}, {'project_type': '2', 'original_org': '5', 'target_org': '3', 'business_tag': '1', 'fee_item': '6', 'fee_type': '4', 'product_code': '2', 'ratio': 0.23}]


In [54]:
def allocate(source_fee: Fee, driver: Driver) -> list[Fee]:
    dest_fee_list = []
    source_amount = source_fee.amount
    for driver_element in driver.elements:
        dest_fee_def = FeeDefinition(
            driver_element['project_type'],
            driver_element['original_org'],
            driver_element['target_org'],
            driver_element['business_tag'],
            driver_element['fee_item'],
            driver_element['fee_type'],
            driver_element['product_code']
        )
        ratio = driver_element['ratio']
        dest_amount = round(source_amount * ratio, 2)
        dest_fee = Fee(dest_fee_def, dest_amount)
        dest_fee_list.append(dest_fee)
    
    return dest_fee_list

In [55]:
dest_fees = allocate(source_fee, driver)
print(f'source fee:${source_fee}')
print('dest fees:')
for dest_fee in dest_fees:
    print(dest_fee)

source fee:${"project_type": "2", "original_org": "2", "target_org": "4", "business_tag": "5", "fee_item": "2", "fee_type": "4", "product_code": "2", "amount": 82764.31}
dest fees:
{"project_type": "4", "original_org": "1", "target_org": "1", "business_tag": "1", "fee_item": "4", "fee_type": "2", "product_code": "6", "amount": 4138.22}
{"project_type": "1", "original_org": "6", "target_org": "4", "business_tag": "3", "fee_item": "3", "fee_type": "1", "product_code": "5", "amount": 50486.23}
{"project_type": "4", "original_org": "4", "target_org": "1", "business_tag": "2", "fee_item": "1", "fee_type": "6", "product_code": "1", "amount": 2482.93}
{"project_type": "3", "original_org": "3", "target_org": "5", "business_tag": "4", "fee_item": "5", "fee_type": "6", "product_code": "2", "amount": 6621.14}
{"project_type": "2", "original_org": "5", "target_org": "3", "business_tag": "1", "fee_item": "6", "fee_type": "4", "product_code": "2", "amount": 19035.79}


# 随机创建10000笔分摊费用

In [56]:
# 随机100个分摊规则
fee_def_drivers_pair = {}
for i in range(20):
    source_fee_def = FeeDefinition()
    source_fee_def.random()
    
    driver = Driver(random_driver_elements(random.randint(1, 100)))
    fee_def_drivers_pair[source_fee_def] = driver
    

In [57]:
training_data = []
for key, value in fee_def_drivers_pair.items():
    for i in range(500):
        source_fee = Fee(key)
        print(source_fee.amount)
        dest_fees = allocate(source_fee, value)
        training_data_entry = {
            "instruction" : "do allocation for the given fee",
            "input" : source_fee.to_training_format(),
            "output" : ';'.join(dest_fee.to_training_format() for dest_fee in dest_fees)
        }
        training_data.append(training_data_entry)

53974.75
65703.77
31251.98
90999.16
68323.57
8353.49
20283.77
69564.47
58851.1
69328.62
58995.23
15810.94
3787.59
17291.68
61258.39
17248.82
48324.94
97479.81
28898.29
85668.0
64249.78
35161.38
32218.75
49861.62
73364.87
89332.4
71396.4
98883.37
2018.77
8712.31
35655.76
62492.73
46003.49
32438.15
16193.12
43622.21
13205.77
43759.59
32741.61
29211.2
95767.32
72842.93
42477.92
53331.24
48118.96
8859.04
51984.39
2876.97
29508.87
68649.73
61601.99
20437.31
17884.89
85162.87
57378.73
24762.65
83056.84
57042.55
14740.92
52380.04
7270.49
59189.7
71136.72
65864.62
74071.61
83103.27
76638.02
10060.04
56993.4
29199.59
31548.98
50334.11
75313.24
46426.04
21096.22
78715.66
42470.73
45785.51
3855.55
77501.01
76557.73
91660.92
41951.2
11710.06
84008.01
41370.15
47209.12
31714.73
33778.36
22143.84
3703.4
37955.61
57206.56
34357.05
45621.66
6319.32
19035.55
38055.71
34318.18
95322.77
75543.53
50442.62
85294.89
30872.88
29354.57
25664.21
99133.34
40354.22
25337.21
95860.4
9569.15
38488.84
93112.32
9063

In [60]:
print(training_data[4])

{'instruction': 'do allocation for the given fee', 'input': '2-2-6-1-1-6-3-68323.57', 'output': '4-5-1-4-5-5-5-683.24;1-5-5-6-4-1-1-1366.47;1-3-1-3-6-4-5-683.24;3-4-2-4-1-2-3-1366.47;3-3-5-1-3-4-2-683.24;6-3-4-3-1-4-3-683.24;3-6-1-4-6-6-4-683.24;1-2-2-1-4-2-6-683.24;5-5-4-6-1-6-1-683.24;6-6-1-3-1-5-6-683.24;4-1-5-6-5-2-1-683.24;2-6-6-1-6-2-3-1366.47;6-6-6-6-5-6-2-683.24;2-2-6-6-4-6-1-683.24;3-2-4-3-5-4-6-683.24;5-5-2-4-3-3-2-683.24;1-3-3-2-4-4-3-683.24;6-1-3-5-3-5-1-683.24;1-5-4-4-3-1-5-683.24;6-1-4-1-4-5-4-683.24;5-5-1-5-2-5-4-683.24;4-3-3-3-6-1-3-683.24;4-6-5-4-2-6-5-683.24;2-1-6-2-1-5-4-683.24;6-1-4-5-5-4-2-683.24;2-6-6-3-4-5-3-1366.47;4-6-1-1-4-6-3-683.24;2-6-1-1-3-6-1-683.24;5-3-1-2-3-4-6-683.24;4-5-6-3-6-4-4-683.24;1-5-5-3-5-5-1-683.24;3-5-3-5-3-4-1-683.24;4-4-5-1-2-3-5-683.24;1-1-5-1-1-3-6-683.24;5-4-2-2-4-4-5-683.24;1-5-3-6-6-4-3-1366.47;5-4-4-6-1-1-2-1366.47;3-1-1-3-1-5-3-1366.47;4-1-3-5-1-6-3-683.24;1-1-6-1-3-6-1-683.24;5-1-5-2-6-2-5-683.24;6-5-6-1-3-4-6-683.24;1-5-1-3-1-1-4-

In [64]:
with open('data/train.json', 'w') as f:
    json.dump(training_data, f, indent=4)

In [65]:
testing_data = []
for key, value in fee_def_drivers_pair.items():
    for i in range(10):
        source_fee = Fee(key)
        dest_fees = allocate(source_fee, value)
        testing_data_entry = {
            "instruction" : "do allocation for the given fee",
            "input" : source_fee.to_training_format(),
            "output" : ';'.join(dest_fee.to_training_format() for dest_fee in dest_fees)
        }
        testing_data.append(testing_data_entry)

70869.38
21778.55
66105.25
98772.45
90883.69
9140.15
3123.17
35336.2
60652.86
48270.87
5070.85
40564.63
46734.9
35355.15
65007.12
51626.73
3303.76
21204.14
33076.71
86223.28
81888.06
62628.9
2315.67
29267.38
22905.44
65514.5
91497.18
12940.7
8103.29
4404.87
74105.02
19292.92
14469.64
38166.77
19665.01
18209.58
5172.72
22694.14
39212.41
17409.26
41979.49
71448.3
83444.98
48782.88
33778.48
29818.02
11377.45
89713.05
91051.89
91759.3
61257.82
57152.95
21550.51
36001.87
40975.27
46193.46
48003.83
17961.45
4873.13
87579.82
688.79
92058.15
45507.01
39330.22
3632.17
90178.86
1141.24
31640.96
27032.28
25120.1
22485.73
36100.01
30764.13
8771.0
67636.15
91383.93
85340.95
9752.13
76817.16
52272.95
61231.45
39056.54
20086.41
3727.25
38645.3
19368.93
18859.98
79193.45
52500.52
5865.03
60602.3
45511.57
12090.04
13110.93
70892.45
27528.88
238.45
8393.09
20600.81
50966.16
56861.23
66796.13
64848.14
1905.72
81174.45
18529.86
28302.99
47915.17
22160.67
93428.49
54251.51
79072.21
83241.02
50287.84
96553.

In [66]:
with open('data/test.json', 'w') as f:
    json.dump(testing_data, f, indent=4)