In [5]:
using DataFrames
using CSVFiles
using StatsBase
using LinearAlgebra

data = load("processed_data_5f34929a.csv")|>DataFrame
rename!(data, :sales=>:Sales,
    Symbol("Customer Id")=>:ID,
    :GR=>:Growth,
)
data = data[!,["Province","City","ID",
               "Customer Name","Potential","Sales",
               "ALL_CUST","MA_CUST",
               "VR_CUST","VR_CALL_CNT","VEEVA_CNT",
               "SPK_CNT","EMEET_CNT","ADD_WECHAT_CNT",
               "sales_half_yr_pre","sales_half_yr","hospital_segment","Group"]]

data[!,:Growth] = data.sales_half_yr./data.sales_half_yr_pre.-1
data[!,:Growth] = round.(data.Growth,digits=1)
data[!,:rounded_sales] = round.(data.sales_half_yr,sigdigits=1,base=2)
# replace!(data[!,:Growth],NaN=>0,Inf=>1)
sort!(data, [:Province,:hospital_segment,:Group,:Growth,:rounded_sales]);
data = data[!,Not(:rounded_sales)]

Unnamed: 0_level_0,Province,City,ID,Customer Name,Potential,Sales,ALL_CUST,MA_CUST,VR_CUST,VR_CALL_CNT,VEEVA_CNT,SPK_CNT,EMEET_CNT,ADD_WECHAT_CNT,sales_half_yr_pre,sales_half_yr,hospital_segment,Group,Growth
Unnamed: 0_level_1,String,String,Int64,String,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64,Int64,Int64,Float64,Float64,String,String,Float64
1,云南省,普洱市,900019156,普洱市中医医院,1.03322e6,5924.3,0.0,0.0,0.0,0,0,0,0,0,335.338,0.0,Next,LH,-1.0
2,云南省,曲靖市,900016428,富源县人民医院,2.02527e6,1289.25,0.0,0.0,0.0,0,0,0,0,0,1289.25,0.0,Next,LH,-1.0
3,云南省,普洱市,900009820,景谷县中医院,727058.0,1.00991e5,0.0,0.0,0.0,0,0,0,0,0,38677.6,17190.0,Next,LH,-0.6
4,云南省,玉溪市,900011471,峨山县中医医院,691566.0,1.47102e5,0.0,0.0,0.0,0,0,0,0,0,51418.5,36887.2,Next,LH,-0.3
5,云南省,楚雄彝族自治州,900013854,楚雄市人民医院,1.3015e6,4727.26,0.0,0.0,0.0,0,0,0,0,0,2578.51,2148.76,Next,LH,-0.2
6,云南省,德宏傣族景颇族自治州,900008000,瑞丽市人民医院,1.51464e6,1.23181e5,0.0,0.0,0.0,0,0,0,0,0,52759.8,45829.5,Next,LH,-0.1
7,云南省,西双版纳傣族自治州,900006207,景洪市人民医院,1.47954e6,195788.0,1.0,1.0,1.0,0,3,1,0,0,81905.7,73774.3,Next,LH,-0.1
8,云南省,玉溪市,900007462,澄江县人民医院,1.3832e6,7134.75,0.0,0.0,0.0,0,0,0,0,0,2707.87,2621.92,Next,LH,-0.0
9,云南省,玉溪市,900006785,云南玉溪市百信医院,2.33709e6,80198.8,1.0,0.0,0.0,0,3,0,0,0,27684.3,28012.1,Next,LH,0.0
10,云南省,楚雄彝族自治州,900008654,姚安县中医医院,833482.0,24066.1,0.0,0.0,0.0,0,0,0,0,0,8595.02,9024.77,Next,LH,0.1


In [6]:
function initiate(data)
    n = nrow(data)
    n_partition = 3
    m = nrow(data) % 3
    partition = repeat(collect(1:3), n ÷ 3)
    append!(partition, collect(1:3)[begin:m]);
    (n,n_partition,partition)
end 

initiate (generic function with 1 method)

In [7]:
function objective(partition, data, n_partition, n=3)
    group_sales = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    group_potential = [data.Potential[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    group_cust_counts = [data.ALL_CUST[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    present = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n_partition]
    ex = [data.sales_half_yr_pre[partition .== i]|>sum for i ∈ 1:n_partition]
    group_growth = present ./ ex .-1 |> std
    top_plus_std = [(partition.==i) .& (data.hospital_segment.=="Top Plus") |> sum for i ∈ 1:n_partition] |> std
    [group_sales,group_potential,group_cust_counts,group_growth,top_plus_std]
end 

objective (generic function with 2 methods)

In [12]:
function find_partition(data)
    n,n_partition,partition = initiate(data)
    ϵ = objective(partition, data, n_partition)
    ex_ϵ = ϵ
    
    total_no_of_improvment = 0
    for t ∈ 1:10
        no_of_improvment = 0
        for loc ∈ 1:n
            original = partition[loc]
            options = setdiff(1:3,original)
                for option ∈ options
                    partition[loc] = option
                    ϵ′ = objective(partition, data, n_partition)
                    if ϵ′[1] <= ϵ[1] && ϵ′[2] <= ϵ[2] && ϵ′[3] <= ϵ[3] && ϵ′[4] <= ϵ[4]
                        ϵ = ϵ′
                        no_of_improvment = no_of_improvment + 1
                    else
                        partition[loc] = original
                end
            end
        end
        total_no_of_improvment = total_no_of_improvment + no_of_improvment
        println("Number of improvments: $(total_no_of_improvment)")
        println("Best objective value: $(ϵ)")
        println("Ex-objective value: $(ex_ϵ)")
        norm_difference = norm(ex_ϵ.- ϵ)
        println("Norm difference: $(norm_difference)")
        (no_of_improvment == 0 || norm_difference <= 1e-2) && break
        ex_ϵ=ϵ
    end
    println("Total Number of improvments: $(total_no_of_improvment)")
    println("################################################")
    return partition
end

find_partition (generic function with 1 method)

In [13]:
HH = data[data.Group.=="HH",:];
partition=find_partition(HH);

Number of improvments: 3
Best objective value: [451362.0990552265, 3.282949629897355e6, 30.171730698343, 0.05051150329235351, 0.5773502691896258]
Ex-objective value: [545822.7506721411, 8.206513213254876e6, 42.335957923889396, 0.05969242220930142, 0.5773502691896258]
Norm difference: 4.924469633799788e6
Number of improvments: 4
Best objective value: [451362.0990552265, 3.282949629897355e6, 30.171730698343, 0.05051150329235351, 0.5773502691896258]
Ex-objective value: [451362.0990552265, 3.282949629897355e6, 30.171730698343, 0.05051150329235351, 0.5773502691896258]
Norm difference: 0.0
Total Number of improvments: 4
################################################
