In [165]:
using DataFrames
using CSVFiles
using StatsBase
using LinearAlgebra
import ExcelFiles

In [166]:
function min_max_scale(x)
    dt = fit(UnitRangeTransform, x, dims=1)
    StatsBase.transform(dt, x)
end

data = load("processed_data_5f34929a.csv")|>DataFrame
rename!(data, :sales=>:Sales,
    Symbol("Customer Id")=>:ID,
    :GR=>:Growth,
)

data[!,:Growth] = data.sales_half_yr./data.sales_half_yr_pre.-1
data[!,:Growth] = round.(data.Growth,digits=1)
data[!,:rounded_sales] = round.(data.sales_half_yr,sigdigits=1,base=2);

In [167]:
immutables = load("DBU project 重点医院.csv")|>DataFrame
rename!(immutables,:Parent_Customer_ID=>:ID)
leftjoin!(data, immutables[!,[:ID,Symbol("Upside Tgt HP")]], on=:ID)
replace!(data[!,"Upside Tgt HP"],missing=>"N")
immutables = immutables.ID;
sort!(data, [Symbol("Upside Tgt HP"),:Province,:hospital_segment,:Growth,:rounded_sales,:Group]);

In [169]:
stats = combine(groupby(data,[:Group,Symbol("Upside Tgt HP")]),AsTable([:sales_half_yr, :sales_half_yr_pre]) => (x -> sum(x.sales_half_yr)/sum(x.sales_half_yr_pre)-1)=>Symbol("Growth"),names(res)[6:end-3].=>sum,nrow=>:Hospital_Counts)
stats = sort!(stats,[:Group,Symbol("Upside Tgt HP")])

Unnamed: 0_level_0,Group,Upside Tgt HP,Growth,Potential_sum,Sales_sum,ALL_CUST_sum,MA_CUST_sum,VR_CUST_sum,VR_CALL_CNT_sum,VEEVA_CNT_sum,SPK_CNT_sum,EMEET_CNT_sum,ADD_WECHAT_CNT_sum,sales_half_yr_pre_sum,Hospital_Counts
Unnamed: 0_level_1,String,String?,Float64,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64,Int64,Int64,Float64,Int64
1,HH,N,0.051542,192786000.0,17064900.0,271.0,260.0,235.0,61,193,83,14,6,4872170.0,231
2,HH,Y,-0.0336625,98764400.0,13759600.0,288.0,278.0,236.0,71,292,83,16,8,4223860.0,34
3,HL,N,0.135403,259381000.0,46514700.0,907.0,751.0,616.0,434,2025,266,104,53,13114700.0,496
4,LH,N,0.0926026,660893000.0,52163100.0,1068.0,816.0,660.0,61,1637,283,137,19,14969200.0,757
5,LH,Y,-0.0136747,205520000.0,38525800.0,643.0,574.0,506.0,25,1222,331,141,34,11560600.0,62
6,LL,N,0.108583,138150000.0,12992200.0,227.0,210.0,166.0,80,731,92,68,16,3828720.0,131


In [95]:
function initiate(data)
    n = nrow(data[data[!,:"Upside Tgt HP"].=="N",:])
    immutables = data[data[!,:"Upside Tgt HP"].=="Y",:]
    
    n_partition = 3
    m = nrow(data) % 2
    partition = repeat(collect(1:2), n ÷ 2)
    append!(partition, collect(1:3)[begin:m]);
    append!(partition, repeat([3], nrow(immutables)));
    (n,n_partition,partition)
end;

In [144]:
function objective(partition, data, n_partition, n=3)
    group_sales = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    group_potential = [data.Potential[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    group_cust_counts = [data.ALL_CUST[partition .== i]|>sum for i ∈ 1:n_partition] |> std
    present = [data.sales_half_yr[partition .== i]|>sum for i ∈ 1:n_partition]
    ex = [data.sales_half_yr_pre[partition .== i]|>sum for i ∈ 1:n_partition]
    group_growth = present ./ ex .-1 |> std
    top_plus_std = [(partition.==i) .& (data.hospital_segment.=="Top Plus") |> sum for i ∈ 1:n_partition] |> std
    [group_sales,group_potential,group_cust_counts,group_growth]
end 

objective (generic function with 2 methods)

In [157]:
function find_partition(data)
    n,n_partition,partition = initiate(data)
    data[!,:Partition]=partition
    
    sort!(data, :Growth)
    partition=data.Partition
    data=data[!,Not(:Partition)]
    
    ϵ = objective(partition, data, n_partition)
    immutables = data.ID[data[!,:"Upside Tgt HP"].=="Y",:]
    ex_ϵ = ϵ
    
    total_no_of_improvment = 0
    for t ∈ 1:10
        no_of_improvment = 0
        for loc1 ∈ 1:n
            for loc2 ∈ loc1:n

                a1 = partition[loc1]; a2 = partition[loc2]
                    partition[loc1] = a2; partition[loc2] = a1

                    ϵ′ = objective(partition, data, n_partition)
                    selected_hospitals = data.ID[partition.==3,:]
                    condition_on_immutables = immutables ⊆ selected_hospitals
                    if ϵ′[4] <= ϵ[4] && condition_on_immutables
                        ϵ = ϵ′
                        no_of_improvment = no_of_improvment + 1
                    else
                        partition[loc1] = a1; partition[loc2] = a2
                end
            end
        end
        total_no_of_improvment = total_no_of_improvment + no_of_improvment
        println("Number of improvments: $(total_no_of_improvment)")
        println("Best objective value: $(ϵ[4])")
        println("Ex-objective value: $(ex_ϵ[4])")
        norm_difference = ex_ϵ[4].- ϵ[4]
        println("growth difference: $(norm_difference)")
        (no_of_improvment == 0 || norm_difference <= 1e-2) && break
        ex_ϵ=ϵ
    end
    println("Total Number of improvments: $(total_no_of_improvment)")
    println("################################################")
    return partition
end

find_partition (generic function with 2 methods)

In [62]:
combine(groupby(data,:Group),nrow)

Unnamed: 0_level_0,Group,nrow
Unnamed: 0_level_1,String,Int64
1,LH,819
2,HL,496
3,LL,131
4,HH,265


In [154]:
selected = data[data.Group.=="LH",:]

Unnamed: 0_level_0,Province,City,ID,Customer Name,Potential,2022 Q1 Target,Sales,ALL_CUST,MA_CUST,VR_CUST,Stegaltro Listing Status,Stegaltro Listing Date,1/1/2021,1/1/2022,10/1/2021,11/1/2021,12/1/2021,2/1/2021,2/1/2022,3/1/2021,3/1/2022,4/1/2021,4/1/2022,5/1/2021,5/1/2022,6/1/2021,7/1/2021,8/1/2021,9/1/2021,standard_tier,hospital_segment,VR_CALL_CNT,VEEVA_CNT,SPK_CNT,EMEET_CNT,ADD_WECHAT_CNT,Group,sales_half_yr_pre,sales_half_yr,Growth,rounded_sales,Upside Tgt HP
Unnamed: 0_level_1,String,String,Int64,String,Float64,Float64,Float64,Float64,Float64,Float64,String,String,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,Float64,String,String,Int64,Int64,Int64,Int64,Int64,String,Float64,Float64,Float64,Float64,String?
1,云南省,普洱市,900019156,普洱市中医医院,1.03322e6,8380.14,5924.3,0.0,0.0,0.0,Not Listed,Not Listed,0.0,0.0,0.0,335.338,5588.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Others,Next,0,0,0,0,0,LH,335.338,0.0,-1.0,0.0,N
2,云南省,曲靖市,900016428,富源县人民医院,2.02527e6,22476.0,1289.25,0.0,0.0,0.0,Not Listed,Not Listed,0.0,0.0,0.0,1289.25,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,Tier2,Next,0,0,0,0,0,LH,1289.25,0.0,-1.0,0.0,N
3,云南省,普洱市,900009820,景谷县中医院,727058.0,18522.3,1.00991e5,0.0,0.0,0.0,Not Listed,Not Listed,8595.02,0.0,0.0,17190.0,17190.0,0.0,0.0,4297.51,0.0,4297.51,17190.0,6446.27,0.0,4297.51,12892.5,8595.02,0.0,Tier1,Next,0,0,0,0,0,LH,38677.6,17190.0,-0.6,16384.0,N
4,云南省,玉溪市,900011471,峨山县中医医院,691566.0,21315.6,1.47102e5,0.0,0.0,0.0,Not Listed,Not Listed,8942.34,22355.9,16766.9,11177.9,15202.0,4471.17,0.0,8942.34,6706.76,7824.55,3353.38,4471.17,4471.17,8942.34,12295.7,11177.9,0.0,Tier2,Next,0,0,0,0,0,LH,51418.5,36887.2,-0.3,32768.0,N
5,云南省,楚雄彝族自治州,900013854,楚雄市人民医院,1.3015e6,429.751,4727.26,0.0,0.0,0.0,Not Listed,Not Listed,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2148.76,0.0,0.0,0.0,2578.51,Tier2,Next,0,0,0,0,0,LH,2578.51,2148.76,-0.2,2048.0,N
6,云南省,德宏傣族景颇族自治州,900008000,瑞丽市人民医院,1.51464e6,25914.0,1.23181e5,0.0,0.0,0.0,Not Listed,Not Listed,0.0,22355.9,0.0,19226.0,0.0,11177.9,0.0,5588.97,5588.97,2235.59,11177.9,5588.97,6706.76,0.0,11177.9,0.0,22355.9,Tier2,Next,0,0,0,0,0,LH,52759.8,45829.5,-0.1,32768.0,N
7,云南省,西双版纳傣族自治州,900006207,景洪市人民医院,1.47954e6,30297.4,195788.0,1.0,1.0,1.0,Not Listed,Not Listed,0.0,21461.6,11177.9,12602.5,10730.8,0.0,13413.5,0.0,6706.76,558.896,21461.6,7580.15,10730.8,21238.1,10060.1,20120.3,27944.8,Tier2,Next,0,3,1,0,0,LH,81905.7,73774.3,-0.1,65536.0,N
8,云南省,玉溪市,900007462,澄江县人民医院,1.3832e6,11775.2,7134.75,0.0,0.0,0.0,Not Listed,Not Listed,0.0,0.0,0.0,515.701,429.751,343.801,429.751,429.751,429.751,214.875,223.559,214.875,1538.86,171.9,429.751,429.751,1332.67,Tier2,Next,0,0,0,0,0,LH,2707.87,2621.92,-0.0,2048.0,N
9,云南省,玉溪市,900006785,云南玉溪市百信医院,2.33709e6,27933.8,80198.8,1.0,0.0,0.0,Not Listed,Not Listed,11088.9,6706.76,3353.38,6619.93,0.0,0.0,3524.84,5588.97,12966.4,3353.38,3524.84,0.0,1289.25,4471.17,5760.43,2235.59,9715.01,Tier2,Next,0,3,0,0,0,LH,27684.3,28012.1,0.0,32768.0,N
10,云南省,楚雄彝族自治州,900008654,姚安县中医医院,833482.0,10185.1,24066.1,0.0,0.0,0.0,Not Listed,Not Listed,1719.0,2148.76,2148.76,2148.76,0.0,859.502,2148.76,1289.25,0.0,1289.25,2578.51,859.502,2148.76,429.751,4297.51,0.0,0.0,Tier2,Next,0,0,0,0,0,LH,8595.02,9024.77,0.1,8192.0,N


In [158]:
# groups = data.Group|>sort|>unique
groups = ["HH","LH"]
res = DataFrame()

for group ∈ groups
    selected = data[data.Group.==group,:]
    println("##############################################")
    println("Optimizing for $group")
    partition=find_partition(selected)
    selected = selected[!,Not(:Partition)]
    insertcols!(selected,1,(:Partition=>partition));
    res = vcat(res,selected)
end

##############################################
Optimizing for HH
Number of improvments: 11484
Best objective value: 0.04819604807375781
Ex-objective value: 0.04987367694674456
growth difference: 0.0016776288729867528
Total Number of improvments: 11484
################################################
##############################################
Optimizing for LH
Number of improvments: 139186
Best objective value: 0.05858909535650049
Ex-objective value: 0.0640405703404065
growth difference: 0.00545147498390601
Total Number of improvments: 139186
################################################


In [159]:
res = res[!,Not(:rounded_sales)];
# ExcelFiles.save("Partitioned.xlsx",res)

In [160]:
cols = ["Partition","Province","City","ID",
        "Customer Name","Potential","Sales",
        "ALL_CUST","MA_CUST",
        "VR_CUST","VR_CALL_CNT","VEEVA_CNT",
        "SPK_CNT","EMEET_CNT","ADD_WECHAT_CNT",
        "sales_half_yr_pre","sales_half_yr","hospital_segment","Group"];
res = res[!,cols];

In [161]:
stats = combine(groupby(res,[:Group,:Partition]),AsTable([:sales_half_yr, :sales_half_yr_pre]) => (x -> sum(x.sales_half_yr)/sum(x.sales_half_yr_pre)-1)=>Symbol("Growth"),names(res)[6:end-3].=>sum,nrow=>:Hospital_Counts)
stats = sort!(stats,[:Group,:Partition])

Unnamed: 0_level_0,Group,Partition,Growth,Potential_sum,Sales_sum,ALL_CUST_sum,MA_CUST_sum,VR_CUST_sum,VR_CALL_CNT_sum,VEEVA_CNT_sum,SPK_CNT_sum,EMEET_CNT_sum,ADD_WECHAT_CNT_sum,sales_half_yr_pre_sum,Hospital_Counts
Unnamed: 0_level_1,String,Int64,Float64,Float64,Float64,Float64,Float64,Float64,Int64,Int64,Int64,Int64,Int64,Float64,Int64
1,HH,1,0.0383516,76959700.0,5743310.0,87.0,83.0,76.0,23,59,19,5,1,1573470.0,116
2,HH,2,0.0578338,115827000.0,11321500.0,184.0,177.0,159.0,38,134,64,9,5,3298700.0,115
3,HH,3,-0.0336625,98764400.0,13759600.0,288.0,278.0,236.0,71,292,83,16,8,4223860.0,34
4,LH,1,0.0652118,330667000.0,12917900.0,342.0,227.0,194.0,16,394,63,39,4,3450210.0,379
5,LH,2,0.100807,330226000.0,39245200.0,726.0,589.0,466.0,45,1243,220,98,15,11519000.0,378
6,LH,3,-0.0136747,205520000.0,38525800.0,643.0,574.0,506.0,25,1222,331,141,34,11560600.0,62


In [20]:
stats = combine(groupby(res,[:Group,:Partition]),AsTable([:sales_half_yr, :sales_half_yr_pre]) => (x -> sum(x.sales_half_yr)/sum(x.sales_half_yr_pre)-1)=>Symbol("Growth"),names(res)[6:end-3].=>sum,nrow=>:Hospital_Counts)
stats = sort!(stats,[:Group,:Partition])
# ExcelFiles.save("Overall_Stats.xlsx",stats)

In [24]:
segs = sort!(combine(groupby(res,[:Group,:Partition,:hospital_segment]),nrow),[:Group,:hospital_segment,:Partition])
unstack(segs, [:Group, :Partition], :hospital_segment, :nrow, allowduplicates=true)
ExcelFiles.save("Tier_Counts.xlsx",segs)
# combine(groupby(segs,[:Group,:hospital_segment]),:nrow=>std)

In [None]:
provincial = sort(combine(groupby(res,[:Group,:Partition,:Province]),nrow),[:Province,:Partition])
provincial
#unstack(provincial, [:Group, :Partition], :Province, :nrow, allowduplicates=true)

In [None]:
combine(groupby(provincial,:Province),:nrow=>std)[!,2]|>std
combine(groupby(segs,[:Group,:hospital_segment]),:nrow=>std)