In [2]:
from dask_cuda import LocalCUDACluster
from dask.distributed import Client
import numpy as np
import dask

cluster = LocalCUDACluster(
    CUDA_VISIBLE_DEVICES="0,1",
    protocol="ucx",
    enable_tcp_over_ucx=True,
    enable_infiniband=True,
    rmm_managed_memory=True,
    rmm_pool_size='24GB'
)
client = Client(cluster)

dask.config.set({"dataframe.backend": "cudf"})

from src.process.utils import CallDistanceUtil
driver = CallDistanceUtil()

In [51]:
user_info = driver.get_user_info(target='communication', method = 'mean')
user_info

Unnamed: 0,client_nbr,mean_communication_distance,serv_id,born_area_code,register_district,age,male_flag,tenure,phone_brand,phone_level,...,e6_service_flag,e9_service_premium_flag,8card_service_flag,smart_phone_flag,govern_worker_flag,business_purpose_flag,red_mark_flag,govern_cluster_flag,govern_industry_flag,vpn_support_flag
0,deef2umv,7.562327,9v0m5tk3,510623,中江,51,1,26,金派,超低端,...,0,0,0,0,0,0,0,0,1,0
1,jwr3fnaa,1.437749,aau461ky,510602,罗江,40,1,20,酷派,高端,...,0,0,0,1,1,0,0,0,1,0
2,l6hs42gq,16.136670,9g4k60yn,510681,广汉,31,1,28,SAMSUNG,超高端,...,0,0,0,1,1,0,0,0,1,1
3,ljj72v3n,0.184176,aalr60vt,510123,广汉,36,0,115,广信,低端,...,0,0,0,0,0,0,0,0,1,1
4,ewc32v6b,1.125784,9fqo4bul,510624,广汉,46,1,19,英华,超低端,...,0,1,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
66050,i2zlk0jd,1.994285,a24qmxon,510681,广汉,37,1,81,华为,中端,...,0,0,0,1,0,0,0,0,1,1
66051,gouc2ckw,0.081028,8ru2my81,510683,德阳现业,26,1,70,海信,低端,...,0,0,0,0,0,0,0,0,1,1
66052,k258lxd6,0.644275,9mrymxoo,510403,广汉,55,0,75,广信,低端,...,0,0,0,0,1,0,0,0,1,1
66053,g9n43uik,6.199025,a9uimy5d,510624,广汉,44,0,25,同威,超低端,...,0,0,0,0,0,0,0,0,1,1


# Quantile Of The Communication Distance

In [45]:
for i in np.arange(0.3, 1, 0.1):
    print(f"{int(i*100)} quantile: \
          {user_info.mean_communication_distance.quantile(i)}")

30 quantile:           0.0
40 quantile:           0.4819990980897591
50 quantile:           1.1488493777552138
60 quantile:           2.1953152185556766
70 quantile:           3.7885239725842554
80 quantile:           6.930742913349063
90 quantile:           16.179184542317913


In [46]:
for i in np.arange(0.9, 1, 0.01):
    print(f"{int(i*100)} quantile: \
          {user_info.mean_communication_distance.quantile(i)}")

90 quantile:           16.179184542317884
91 quantile:           18.20233660442249
92 quantile:           20.712635480592176
93 quantile:           23.70523150531494
94 quantile:           28.157685566064867
95 quantile:           33.40493154319496
96 quantile:           44.00659149853148
97 quantile:           56.95269405464032
98 quantile:           76.334099883609
99 quantile:           132.7753831860746


### helper ufnction

In [55]:
def encode_born_area_code(x):
    x_ = x // 100

    match x_:
        case 5106:
            return '德阳市'
        case 5101:
            return '成都市'
        case 5107:
            return '绵阳市'
        case 5110:
            return '内江市'
        case 5109:
            return '遂宁市'
        case 5130:
            return '已撤銷_达川地区'
        case 5102:
            return '已撤銷_乐山市市辖区'
        case 5108:
            return '广元市'
        case 5113:
            return '南充市'
        case 5129:
            return '已撤銷_南充地区'
        case 5103:
            return '自贡市'
        case 5111:
            return '乐山市'
        case 5131:
            return '已撤銷_雅安地區'
        case 5105:
            return '泸州市'
        case 5134:
            return '凉山彝族自治州'
        case 5139:
            return '已撤銷_眉山地区资阳地区'
        case 5137:
            return '已撤銷_巴中地區'
        case 5002:
            return '重慶市城口县'
        case 5138:
            return '已撤銷_眉山地区'
        case 5132:
            return '阿坝藏族羌族自治州'
        case 5125:
            return'已撤銷_宜宾市'
        case 5115:
            return '宜宾市'
        case 5116:
            return '广安市'
        case 5122:
            return '已撤銷_万县市'
        case 5104:
            return '攀枝花市'
        case 5133:
            return '甘孜藏族自治州'
        case 3303:
            return '浙江省温州市'
        case 6328:
            return '青海省海西蒙古族藏族自治州'
        case _:
            return '其他'

# Inspect Born Area

In [84]:
user_info['born_area'] = user_info.born_area_code.apply(encode_born_area_code)

In [88]:
user_info.groupby('born_area')['mean_communication_distance'].mean().sort_values(ascending=False).reset_index().merge(
    user_info.groupby('born_area').size().sort_values(ascending=False).reset_index(name='count'),
    on = 'born_area'
)

Unnamed: 0,born_area,mean_communication_distance,count
0,甘孜藏族自治州,41.653763,81
1,凉山彝族自治州,34.513974,171
2,已撤銷_眉山地区,33.366008,41
3,自贡市,24.052753,205
4,攀枝花市,23.870288,79
5,重慶市城口县,20.803322,93
6,泸州市,20.602304,197
7,南充市,19.897566,196
8,宜宾市,18.812205,61
9,广安市,18.436503,48
