In [1]:
using DataFrames
using CSVFiles
using StatsBase
using LinearAlgebra

In [2]:
function min_max_scale(x)
    dt = fit(UnitRangeTransform, x, dims=1)
    StatsBase.transform(dt, x)
end

data = load("processed_data_5f34929a.csv")|>DataFrame
rename!(data, :sales=>:Sales,
    Symbol("Customer Id")=>:ID,
    :GR=>:Growth,
)
data = data[!,["Province","City","ID",
               "Customer Name","Potential","Sales",
               "ALL_CUST","MA_CUST",
               "VR_CUST","VR_CALL_CNT","VEEVA_CNT",
               "SPK_CNT","EMEET_CNT","ADD_WECHAT_CNT",
               "sales_half_yr_pre","sales_half_yr","hospital_segment","Group"]]
data[!,:Growth] = data.sales_half_yr./data.sales_half_yr_pre.-1
sort!(data, :ID);

In [3]:
n = nrow(data)
n_partition = 3
m = nrow(data) % 3
partition = repeat(collect(1:3), n ÷ 3)
append!(partition, collect(1:3)[begin:m]);

In [4]:
function objective(partition, data, n=3)
    group_sales = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n] |> std
    group_potential = [data.Potential[partition .== i]|>sum for i ∈ 1:n] |> std
    group_cust_counts = [data.ALL_CUST[partition .== i]|>sum for i ∈ 1:n] |> std
    present = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n_partition]
    ex = [data.sales_half_yr_pre[partition .== i]|>sum for i ∈ 1:n_partition]
    group_growth = present ./ ex .-1 |> std
    [group_sales,group_potential,group_cust_counts,group_growth]
end

ex_ϵ = ϵ = objective(partition, data, n_partition)
objective(partition,data)

4-element Vector{Float64}:
 896482.6461717698
      1.4899505417506875e7
     71.00234737903624
      0.06602395133118533

In [5]:
function find_partition(data,ϵ=ϵ,ex_ϵ=ex_ϵ)
    total_no_of_improvment = 0
    for t ∈ 1:10
        no_of_improvment = 0
        for loc1 ∈ 1:n
            for loc2 ∈ loc1:n

                a1 = partition[loc1]; a2 = partition[loc2]
                if a1 ≠ a2
                    partition[loc1] = a2; partition[loc2] = a1

                    ϵ′ = objective(partition, data, n_partition)
                    if ϵ′[1] <= ϵ[1] && ϵ′[2] <= ϵ[2] && ϵ′[3] <= ϵ[3] && ϵ′[4] <= ϵ[4]
                        ϵ = ϵ′
                        no_of_improvment = no_of_improvment + 1
                    else
                        partition[loc1] = a1; partition[loc2] = a2
                    end
                end
            end
        end
        total_no_of_improvment = total_no_of_improvment + no_of_improvment
        println("Number of improvments: $(total_no_of_improvment)")
        println("Best objective value: $(ϵ)")
        println("Ex-objective value: $(ex_ϵ)")
        norm_difference = norm(ex_ϵ.- ϵ)
        println("Norm difference: $(norm_difference)")
        (no_of_improvment == 0 || norm_difference <= 1e-5) && break
        ex_ϵ=ϵ
    end
    println("Total Number of improvments: $(total_no_of_improvment)")
    println("################################################")
    return partition
end

partition = find_partition(data);

Number of improvments: 91
Best objective value: [31497.112152282858, 12.379447379074325, 17.09775813764288, 0.01241713666547906]
Ex-objective value: [896482.6461717698, 1.4899505417506875e7, 71.00234737903624, 0.06602395133118533]
Norm difference: 1.492458015383187e7
Number of improvments: 106
Best objective value: [29820.135873742638, 12.379247451601147, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [31497.112152282858, 12.379447379074325, 17.09775813764288, 0.01241713666547906]
Norm difference: 1676.9762785403507
Number of improvments: 115
Best objective value: [29820.135873742638, 11.168042852107568, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [29820.135873742638, 12.379247451601147, 17.09775813764288, 0.011785882240217092]
Norm difference: 1.2112045994935787
Number of improvments: 122
Best objective value: [29820.135873742638, 11.167976731180378, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [29820.135873742638, 11.168042852107568, 17.

In [5]:
function find_partition(data,ϵ=ϵ,ex_ϵ=ex_ϵ)
    total_no_of_improvment = 0
    for t ∈ 1:10
        no_of_improvment = 0
        for loc1 ∈ 1:n
            for loc2 ∈ loc1:n

                a1 = partition[loc1]; a2 = partition[loc2]
                    partition[loc1] = a2; partition[loc2] = a1

                    ϵ′ = objective(partition, data, n_partition)
                    if ϵ′[1] <= ϵ[1] && ϵ′[2] <= ϵ[2] && ϵ′[3] <= ϵ[3] && ϵ′[4] <= ϵ[4]
                        ϵ = ϵ′
                        no_of_improvment = no_of_improvment + 1
                    else
                        partition[loc1] = a1; partition[loc2] = a2
                end
            end
        end
        total_no_of_improvment = total_no_of_improvment + no_of_improvment
        println("Number of improvments: $(total_no_of_improvment)")
        println("Best objective value: $(ϵ)")
        println("Ex-objective value: $(ex_ϵ)")
        norm_difference = norm(ex_ϵ.- ϵ)
        println("Norm difference: $(norm_difference)")
        (no_of_improvment == 0 || norm_difference <= 1e-5) && break
        ex_ϵ=ϵ
    end
    println("Total Number of improvments: $(total_no_of_improvment)")
    println("################################################")
    return partition
end

partition = find_partition(data);

Number of improvments: 488859
Best objective value: [31497.112152282858, 12.379447379074325, 17.09775813764288, 0.01241713666547906]
Ex-objective value: [896482.6461717698, 1.4899505417506875e7, 71.00234737903624, 0.06602395133118533]
Norm difference: 1.492458015383187e7
Number of improvments: 977664
Best objective value: [29820.135873742638, 12.379247451601147, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [31497.112152282858, 12.379447379074325, 17.09775813764288, 0.01241713666547906]
Norm difference: 1676.9762785403507
Number of improvments: 1466449
Best objective value: [29820.135873742638, 11.168042852107568, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [29820.135873742638, 12.379247451601147, 17.09775813764288, 0.011785882240217092]
Norm difference: 1.2112045994935787
Number of improvments: 1955235
Best objective value: [29820.135873742638, 11.167976731180378, 17.09775813764288, 0.011785882240217092]
Ex-objective value: [29820.135873742638, 11.16804

In [6]:
insertcols!(data,1,(:Partition=>partition));

In [7]:
res = combine(groupby(data,:Partition),AsTable([:sales_half_yr, :sales_half_yr_pre]) => (x -> sum(x.sales_half_yr)/sum(x.sales_half_yr_pre)-1)=>Symbol("Growth"),names(data)[6:end-3].=>sum)

Unnamed: 0_level_0,Partition,Growth,Potential_sum,Sales_sum,ALL_CUST_sum,MA_CUST_sum,VR_CUST_sum,VR_CALL_CNT_sum,VEEVA_CNT_sum,SPK_CNT_sum,EMEET_CNT_sum,ADD_WECHAT_CNT_sum,sales_half_yr_pre_sum,sales_half_yr_sum
Unnamed: 0_level_1,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64,Int64,Int64,Float64,Float64
1,1,0.0805827,518498000.0,59278000.0,1143.0,966.0,802.0,258,1677,359,131,46,17272900.0,18664800.0
2,2,0.0627734,518498000.0,60510600.0,1115.0,952.0,820.0,228,2010,374,136,43,17610100.0,18715600.0
3,3,0.0583048,518498000.0,61231600.0,1146.0,971.0,797.0,246,2413,405,213,47,17686200.0,18717300.0


In [8]:
unstack(sort(combine(groupby(data,[:Partition,:hospital_segment]),nrow),:Partition),:Partition,:hospital_segment,:nrow)

Unnamed: 0_level_0,Partition,Others,Top,Next,CHC,Top Plus
Unnamed: 0_level_1,Int64,Int64?,Int64?,Int64?,Int64?,Int64?
1,1,360,32,164,14,1
2,2,371,41,141,14,3
3,3,342,37,177,11,3


In [9]:
unstack(sort(combine(groupby(data,[:Partition,:Group]),nrow),:Partition),:Partition,:Group,:nrow)

Unnamed: 0_level_0,Partition,LH,HH,LL,HL
Unnamed: 0_level_1,Int64,Int64?,Int64?,Int64?,Int64?
1,1,277,72,45,177
2,2,266,93,47,164
3,3,276,100,39,155


In [10]:
unstack(sort(combine(groupby(data,[:Partition,:Province]),nrow),:Partition),:Partition,:Province,:nrow)

Unnamed: 0_level_0,Partition,辽宁省,江苏省,湖北省,四川省,青海省,河北省,山西省,内蒙古自治区,安徽省,湖南省,云南省,甘肃省,新疆维吾尔自治区,黑龙江省,陕西省,宁夏回族自治区,吉林省,河南省,贵州省,江西省,广西壮族自治区,西藏自治区
Unnamed: 0_level_1,Int64,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?,Int64?
1,1,39,16,50,38,19,25,58,28,24,27,20,40,16,28,59,15,9,32,15,2,5,6
2,2,33,28,50,44,16,22,57,44,30,17,22,31,14,18,52,16,14,35,14,1,7,5
3,3,32,19,40,51,13,23,56,33,39,26,17,33,16,18,53,15,10,42,23,1,8,2


In [None]:
sort!(data,:Province);
data[data.Partition.==1,:]

In [None]:
ENV["ROWS"]="500"

In [11]:
provincial = sort(combine(groupby(data,[:Partition,:Province]),nrow),[:Province,:Partition])

Unnamed: 0_level_0,Partition,Province,nrow
Unnamed: 0_level_1,Int64,String,Int64
1,1,云南省,20
2,2,云南省,22
3,3,云南省,17
4,1,内蒙古自治区,28
5,2,内蒙古自治区,44
6,3,内蒙古自治区,33
7,1,吉林省,9
8,2,吉林省,14
9,3,吉林省,10
10,1,四川省,38


In [14]:
combine(groupby(provincial,:Province),:nrow=>std)[!,2]|>std

2.326715050083571

In [None]:
import ExcelFiles
ExcelFiles.save("Partitioned.xlsx",data)

In [None]:
combine(groupby(data[data.ALL_CUST .≠ 0,:],:Partition),:Sales=>sum,:Potential=>sum,:ALL_CUST=>sum,:MA_CUST=>sum,:VR_CUST=>sum)