In [0]:
import requests
import json
import pyspark.sql.functions as F
from pyspark.sql.types import StructField, StructType, StringType, ArrayType, LongType, TimestampType, BinaryType, IntegerType, DateType
from datetime import datetime, timedelta
import time
import re
from azure.storage.blob import BlobClient, generate_blob_sas, BlobSasPermissions
import os

In [0]:
dbutils.widgets.text("detect_startTime", "2018-01-01T00:00:00Z") 
dbutils.widgets.text("detect_endTime", "2020-01-01T00:00:00Z") 
dbutils.widgets.text("fab", "D21") 
dbutils.widgets.text("period", "D") 

detect_startTime =  dbutils.widgets.get("detect_startTime")
detect_endTime = dbutils.widgets.get("detect_endTime")
fab =  dbutils.widgets.get("fab")
period = dbutils.widgets.get("period") 

format_data = "%Y-%m-%dT%H:%M:%SZ"
detect_starDate = datetime.strptime(detect_startTime, format_data).strftime("%Y-%m-%d")
detect_endDate = datetime.strptime(detect_endTime, format_data).strftime("%Y-%m-%d")
# detect_startTime = datetime(2022, 5, 1, 1, 0, 0).strftime('%Y-%m-%d 00:00:00')
# detect_endTime = datetime(2022, 5, 15, 1, 0, 0).strftime('%Y-%m-%d 00:00:00')
# fab = "D21"
# period = "D"

account_name = 'datalakecpcdev'
account_key = dbutils.secrets.get(scope = 'cpc-keyvault-dev', key = 'datalakegne2-datalakecpcdev-key')
container_name = f'zipfile/{fab}/{period}'

if period == 'D': #20220503_聖元新增
    ENDPOINT = "anomalycpcoil.cognitiveservices.azure.com/anomalydetector/v1.1-preview"
    HEADERS = {"Ocp-Apim-Subscription-Key": dbutils.secrets.get(scope = 'cpc-keyvault-dev', key = 'cognitiveservice-anomalycpcoil-secret')}
elif period == 'W':
    ENDPOINT = "w-anomalyoil.cognitiveservices.azure.com/anomalydetector/v1.1-preview"
    HEADERS = {"Ocp-Apim-Subscription-Key": dbutils.secrets.get(scope = 'cpc-keyvault-dev', key = 'cognitiveservice-wanomalyoil-secret')}
elif period == 'M':
    ENDPOINT = "m-anomalyoil.cognitiveservices.azure.com/anomalydetector/v1.1-preview"
    HEADERS = {"Ocp-Apim-Subscription-Key": dbutils.secrets.get(scope = 'cpc-keyvault-dev', key = 'cognitiveservice-manomalyoil-secret')}

In [0]:
API_MODEL = "https://{endpoint}/multivariate/models?$top=300"
API_MODEL_STATUS = "https://{endpoint}/multivariate/models/{model_id}"
API_MODEL_INFERENCE = "https://{endpoint}/multivariate/models/{model_id}/detect"
API_RESULTS = "https://{endpoint}/multivariate/results/{result_id}"
API_EXPORT = "https://{endpoint}/multivariate/models/{model_id}/export"
API_DELETE = "https://{endpoint}/multivariate/models/{model_id}"
SOURCE_BLOB_SAS = "{blobsasstring}"

**get blob sasurl**

In [0]:
account_name = 'datalakecpcdev'
account_key = dbutils.secrets.get(scope = 'cpc-keyvault-dev', key = 'datalakegne2-datalakecpcdev-key')

In [0]:
def get_blob_sasurl(blob_name, fab, period):
    container_name = f'zipfile/{fab}/{period}'
    
    blob_name = f'{blob_name}.zip'
    sas_blob = generate_blob_sas(account_name=account_name, 
                                container_name=container_name,
                                blob_name=blob_name,
                                account_key=account_key,
                                permission=BlobSasPermissions(read=True),
                                expiry=datetime.utcnow() + timedelta(days=1))
    return f'https://{account_name}.blob.core.windows.net/{container_name}/{blob_name}?{sas_blob}'


get_blob_sas_udf = udf(get_blob_sasurl, StringType())

**get the newest models**

In [0]:
from pyspark.sql.window import Window
from pyspark.sql.functions import monotonically_increasing_id, row_number

windowSpec  = Window.partitionBy("station","fab","period").orderBy(F.col("model_training_time").desc())


df_trained_model_list = (spark.read
         .format('delta')
         .load('/mnt/deltalake/trained_model_log')
         .select('station','fab','period','model_training_time','model_id')
         .filter(F.col('fab') == fab )
         .filter(F.col('period') == period )
         .withColumn("id",row_number().over(windowSpec))
         .filter('id == 1')
         .select('model_id','station', get_blob_sas_udf(F.col('station'),F.col('fab'),F.col('period')).alias('blobsasurl'))
)

display(df_trained_model_list)

model_id,station,blobsasurl
999437c8-d270-11ec-9ffb-3298cacac256,01a37bc3b6a23ccdaf5b,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/01a37bc3b6a23ccdaf5b.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=Uji5IbTzfZ/JVepJWHiL5%2BuxLiKVk60nHTBzJj8ELVI%3D
a1457428-d270-11ec-9ffb-3298cacac256,021512a0fa0166f54dd7,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/021512a0fa0166f54dd7.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=NrHuWvxWmwzSz6FgEwojYF1y0wb8Z%2ByMw5713Z0h5I0%3D
a8f4f90a-d270-11ec-889d-3ad8145c4aea,05f098ac2faab7324ae5,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/05f098ac2faab7324ae5.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=kaG/x/PQRqwKdmAVYpfoISjpSH%2BEATHyaihyUzqID/E%3D
b0afb5b8-d270-11ec-889d-3ad8145c4aea,07b8ada8541584b38731,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/07b8ada8541584b38731.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=yCN/h5DeeXIzfSDusK3Z9M/m2VAbmSHpROgw1SCe0uI%3D
b86ae3ea-d270-11ec-889d-3ad8145c4aea,0a0da4158168d1679b8a,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/0a0da4158168d1679b8a.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=fY/irnqcTKQP6ISBYXjAtCYOIB3hc9dw9o0gHfY1NxA%3D
c030d7c4-d270-11ec-8039-462a83312f19,0b08c67ca54995f4451d,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/0b08c67ca54995f4451d.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=/mRAxB1kXZvdcTAhR0eOQFNjFUQdw2Gg5rZass/LoAM%3D
c7dd73a6-d270-11ec-889d-3ad8145c4aea,0fb6ba15172179e2f690,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/0fb6ba15172179e2f690.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=vE/6A6YRGR16QWlJCN9Q1TzZup8aLsRa1TUP8USEmVU%3D
cf922844-d270-11ec-8039-462a83312f19,15787192a2cf526deaf8,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/15787192a2cf526deaf8.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=pUhwcD3wjWJqr6ndD55eQMcluf50BfbxG8rBqtjDaJo%3D
d7417e96-d270-11ec-b7df-462a83312f19,1b06cfe8d00e1293ef0c,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/1b06cfe8d00e1293ef0c.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=1l8f%2BDCqAuvdMpS10JlyS0tGDhywY5fu0SnI/KX7kPk%3D
def3764e-d270-11ec-b7df-462a83312f19,20cc23709cc1ee2e354a,https://datalakecpcdev.blob.core.windows.net/zipfile/D21/D/20cc23709cc1ee2e354a.zip?se=2022-05-17T03%3A52%3A32Z&sp=rt&sv=2021-04-10&sr=b&sig=//bxdCNnfSe1kV4A3XUUV%2BL/Mukfnpmf4z4qdcW8tsk%3D


**Create detect functions**

In [0]:
def check_model_status(model_id):
    res = requests.get(API_MODEL_STATUS.format(endpoint=ENDPOINT, model_id = model_id), headers=HEADERS)
    assert res.status_code == 200, f"Error occured. Error message: {res.content}"
    model_status = json.loads(res.content)['modelInfo']['status']

    return model_status

In [0]:
def detect_data(SOURCE_BLOB_SAS,model_id):
    data = {
    'source': SOURCE_BLOB_SAS,
    'startTime': detect_startTime, 
    'endTime': detect_endTime, 
    }
    
    res = requests.post(API_MODEL_INFERENCE.format(endpoint=ENDPOINT, model_id=model_id),data=json.dumps(data), headers=HEADERS)
    assert res.status_code == 201, f"Error occured. Error message: {res.content}"
    result_id = res.headers['location'].split("/")[-1]
#     print(result_id)
    return result_id

In [0]:
def get_result_to_list(result_id, station, fab):
    
    is_result_waiting_ready = True
    while(is_result_waiting_ready):
        res = requests.get(API_RESULTS.format(endpoint=ENDPOINT, result_id=result_id), headers=HEADERS)
        assert res.status_code == 200, f"Error occured. Error message: {res.content}"
#         print(result_id)
        # parse result
        result_json = json.loads(res.content.decode('utf-8'))
        result_id = result_json['resultId']
        result_status = result_json['summary']['status']
        result_error  = result_json['summary']['errors']
        print(result_status)
        if(result_status == "READY"):
            is_result_waiting_ready = False
        if(result_status == "FAILED"):
            is_result_waiting_ready = False
        time.sleep(2)  

    if result_status == 'READY':
        filter_item = list(filter(lambda x: 'value' in x and 'isAnomaly' in x['value'], result_json['results']))
    else:
        filter_item = []
#         [{'timestamp':'1911-01-01T00:00:00Z','value':{},'errors':result_error}]
    result_json = json.loads(res.content.decode('utf-8'))
    result_list = {}
    result_list['station']= station
    result_list['fab']= fab
    result_list['content']= list(filter(lambda x: 'value' in x and 'isAnomaly' in x['value'], result_json['results']))
    
    return result_list
   

**Inference data**

In [0]:
_detect_contribution = f'{period}_detect_contribution'
_detect_result = f'{period}_detect_result'

In [0]:
rdd_trained_model_list = df_trained_model_list.select('blobsasurl','model_id','station').collect()

trained_model_result_list = []
error_list = []
for traind_model in rdd_trained_model_list:
    try:
        trained_model_detect_dist = {}
        print(traind_model['station'])
        model_status = check_model_status(traind_model['model_id'])
        trained_model_detect_dist['station'] = traind_model['station']
        trained_model_detect_dist['period'] = period
        trained_model_detect_dist['fab'] = fab
        trained_model_detect_dist['model_id'] = traind_model['model_id']
        trained_model_detect_dist['detect_time'] = datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S')
        result_id = detect_data(traind_model['blobsasurl'],traind_model['model_id'])
        trained_model_detect_dist['detect_startTime'] = detect_startTime
        trained_model_detect_dist['detedct_endTime'] = detect_endTime
        trained_model_detect_dist['result_id'] = result_id
        trained_model_result_list.append(trained_model_detect_dist)

    except Exception as e:
        error_dist = {}
        error_dist['station'] = traind_model['station']
        error_dist['errorcode'] = e
        error_list.append(error_dist)

error_list

In [0]:
result_list = []
for result in trained_model_result_list:
    print(result['result_id'])
    result_dict = get_result_to_list(result['result_id'], result['station'], result['fab'])
    result_list.append(result_dict)


In [0]:
from delta.tables import DeltaTable

result_deltaTable = DeltaTable.forPath(spark, f'/mnt/deltalake/{_detect_result}')
contribution_deltaTable = DeltaTable.forPath(spark, f'/mnt/deltalake/{_detect_contribution}')

# format_data = "%Y-%m-%d %H:%M:%S"
# detect_starDate = datetime.strptime(detect_startTime, format_data).strftime('%Y-%m-%d')
# detect_endDate = datetime.strptime(detect_endTime, format_data).strftime('%Y-%m-%d')

result_deltaTable.delete(f'date >= "{detect_starDate}" and date <= "{detect_endDate}" and fab = "{fab}" ' )
contribution_deltaTable.delete(f'date >= "{detect_starDate}" and date <= "{detect_endDate}" and fab = "{fab}" ' )

#直接用{detect_startTime}跟{detect_endTime}似乎格式會有問題，剛剛測試要用"YYYY-MM-DD"的格式，不能用"YYYY-MM-DD HH:MM:SS"的格式，所以我暫時先改成這樣
#錯誤問題可以看5/15早上的daily_incremental_copy偵錯
# result_deltaTable.delete(f'date >= "2022-03-01"' )
# contribution_deltaTable.delete(f'date >= "2022-03-01"' )

**Convert result list to df (delta table )**

In [0]:
error_schema= StructType([
        StructField('code', StringType(), True),
        StructField('message', StringType(), True)
    ])
contributors_schema = StructType([
        StructField('variable', StringType(), True),
        StructField('contributionScore', StringType(), True)
    ])
value_schema= StructType([
        StructField('severity', StringType(), True),
        StructField('score', StringType(), True),
        StructField('contributors', ArrayType(contributors_schema), True),
      StructField('isAnomaly', StringType(), True)
    ])
raw_schema = StructType([
        StructField('timestamp', StringType(), True),
        StructField('value', value_schema, True),
        StructField('errors', ArrayType(error_schema), True)
    ])
filter_schema = StructType([
            StructField('station', StringType(), True),
            StructField('fab', StringType(), True),
            StructField('content', ArrayType(raw_schema), True)
        ])

rdd = spark.sparkContext.parallelize(result_list)

df_raw = (spark.createDataFrame(rdd, filter_schema)
                 .select('station','fab',F.explode('content').alias('value'))
                 .select('station','fab', 'value.*')
                 .withColumn('timestamp', F.to_timestamp('timestamp').cast('string'))
                 .withColumn("date",F.date_format(F.to_timestamp(F.col('timestamp'), 'yyyy-MM-dd HH:mm:ss'), "yyyy-MM-dd")) #pyspark functions
                 .select('station','fab', 'date','value.*')
                 )
display(df_raw)
(df_raw.drop('contributors')
           .write
           .format('delta')
           .mode('append')
           #.partitionBy('station')
           .save(f'/mnt/deltalake/{_detect_result}'))
    
(df_raw.filter(F.col('isAnomaly')== "true")
           .withColumn('contributors', F.explode('contributors'))
           .select(*df_raw.columns, 'contributors.*')
           .drop('contributors')
           .write
           .format('delta')
           .mode('append')
#          .partitionBy('station')
           .save(f'/mnt/deltalake/{_detect_contribution}')
    )

station,fab,date,severity,score,contributors,isAnomaly


**Insert Result Metadata**

In [0]:
rdd = spark.sparkContext.parallelize(trained_model_result_list)
df_models = spark.createDataFrame(rdd)

(df_models.write
         .format('delta')
         .mode('append')
         .save('/mnt/deltalake/trained_detect_log')
)

display(df_models)

detect_startTime,detect_time,detedct_endTime,fab,model_id,period,result_id,station
2022-03-01 00:00:00,2022-05-16 03:52:39,2022-05-16 00:00:00,D21,999437c8-d270-11ec-9ffb-3298cacac256,D,a2106ac2-d4cb-11ec-8177-d60320dc67ee,01a37bc3b6a23ccdaf5b
2022-03-01 00:00:00,2022-05-16 03:52:41,2022-05-16 00:00:00,D21,a1457428-d270-11ec-9ffb-3298cacac256,D,a312a1b0-d4cb-11ec-8722-22fd69dbc8e5,021512a0fa0166f54dd7
2022-03-01 00:00:00,2022-05-16 03:52:43,2022-05-16 00:00:00,D21,a8f4f90a-d270-11ec-889d-3ad8145c4aea,D,a414a8ec-d4cb-11ec-8177-d60320dc67ee,05f098ac2faab7324ae5
2022-03-01 00:00:00,2022-05-16 03:52:44,2022-05-16 00:00:00,D21,b0afb5b8-d270-11ec-889d-3ad8145c4aea,D,a51b0c18-d4cb-11ec-afa1-b64641111a4c,07b8ada8541584b38731
2022-03-01 00:00:00,2022-05-16 03:52:46,2022-05-16 00:00:00,D21,b86ae3ea-d270-11ec-889d-3ad8145c4aea,D,a60d2bba-d4cb-11ec-8177-d60320dc67ee,0a0da4158168d1679b8a
2022-03-01 00:00:00,2022-05-16 03:52:48,2022-05-16 00:00:00,D21,c030d7c4-d270-11ec-8039-462a83312f19,D,a712880c-d4cb-11ec-8177-d60320dc67ee,0b08c67ca54995f4451d
2022-03-01 00:00:00,2022-05-16 03:52:49,2022-05-16 00:00:00,D21,c7dd73a6-d270-11ec-889d-3ad8145c4aea,D,a80fc3e6-d4cb-11ec-afa1-b64641111a4c,0fb6ba15172179e2f690
2022-03-01 00:00:00,2022-05-16 03:52:51,2022-05-16 00:00:00,D21,cf922844-d270-11ec-8039-462a83312f19,D,a8ff573a-d4cb-11ec-8177-d60320dc67ee,15787192a2cf526deaf8
2022-03-01 00:00:00,2022-05-16 03:52:52,2022-05-16 00:00:00,D21,d7417e96-d270-11ec-b7df-462a83312f19,D,a9ec1d4a-d4cb-11ec-8177-d60320dc67ee,1b06cfe8d00e1293ef0c
2022-03-01 00:00:00,2022-05-16 03:52:54,2022-05-16 00:00:00,D21,def3764e-d270-11ec-b7df-462a83312f19,D,aadcf72e-d4cb-11ec-afa1-b64641111a4c,20cc23709cc1ee2e354a


**optimize detect result**

In [0]:
df_raw = (spark.read
           .format('delta')
           .load(f'/mnt/deltalake/{_detect_result}'))

display(df_raw)

station,fab,date,severity,score,isAnomaly
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True
a582e74569f9aecc1be3,D21,2021-01-02,0.3753823935985565,0.6450127363204956,True
a582e74569f9aecc1be3,D21,2021-01-03,0.4657585024833679,0.8003044128417969,True
a582e74569f9aecc1be3,D21,2021-01-04,0.3369172215461731,0.5789187550544739,True
a582e74569f9aecc1be3,D21,2021-01-05,0.0,0.4608480930328369,False
a582e74569f9aecc1be3,D21,2021-01-06,0.0,0.518524706363678,False
a582e74569f9aecc1be3,D21,2021-01-07,0.320677787065506,0.5510148406028748,True
a582e74569f9aecc1be3,D21,2021-01-08,0.3661214709281921,0.6290999054908752,True
a582e74569f9aecc1be3,D21,2021-01-09,0.3818665444850921,0.6561543345451355,True
a582e74569f9aecc1be3,D21,2021-01-10,0.5409566164016724,0.9295158982276917,True


%
_detect_result

In [0]:
%sql
-- OPTIMIZE delta.`/mnt/deltalake/D_detect_result`
-- ZORDER BY (station)
-- DESCRIBE HISTORY delta.`/mnt/deltalake/d_detect_result`

path,metrics
dbfs:/mnt/deltalake/D_detect_result,"List(0, 0, List(null, null, 0.0, 0, 0), List(null, null, 0.0, 0, 0), 0, List(minCubeSize(107374182400), List(1, 2327821), List(0, 0), 1, List(0, 0), 0, null), 0, 1, 1, false)"


In [0]:
df_contribution_raw = (spark.read
           .format('delta')
           .load(f'/mnt/deltalake/{_detect_contribution}'))


display(df_contribution_raw)

station,fab,date,severity,score,isAnomaly,variable,contributionScore
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,debitcard,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,others,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,selfservice,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,fleetcard,0.146622285246849
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,not_vip,0.0735598057508468
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,diesel,0.059380155056715
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,vip,0.0285757258534431
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,gasline,0.0272580627351999
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,nonselfservice,0.0216491017490625
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,scooter,0.0137970726937055


In [0]:
%sql
-- OPTIMIZE delta.`/mnt/deltalake/D_detect_contribution`
-- ZORDER BY (station)


**Export the result to CSV for Power BI**

In [0]:
df_raw = (spark.read
           .format('delta')
           .load(f'/mnt/deltalake/{_detect_result}'))

display(df_raw)

df_contribution_raw = (spark.read
           .format('delta')
           .load(f'/mnt/deltalake/{_detect_contribution}'))


display(df_contribution_raw)

df_raw_pandas = df_raw.toPandas()
df_raw_contribution_pandas = df_contribution_raw.toPandas()

df_raw_pandas.to_csv(f'{_detect_result}.csv', index=False)
df_raw_contribution_pandas.to_csv(f'{_detect_contribution}.csv', index=False)

dbutils.fs.mv(f'file:/databricks/driver/{_detect_result}.csv', f'/mnt/deltalake/csv/{_detect_result}.csv')
dbutils.fs.mv(f'file:/databricks/driver/{_detect_contribution}.csv', f'/mnt/deltalake/csv/{_detect_contribution}.csv')

station,fab,date,severity,score,isAnomaly
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True
a582e74569f9aecc1be3,D21,2021-01-02,0.3753823935985565,0.6450127363204956,True
a582e74569f9aecc1be3,D21,2021-01-03,0.4657585024833679,0.8003044128417969,True
a582e74569f9aecc1be3,D21,2021-01-04,0.3369172215461731,0.5789187550544739,True
a582e74569f9aecc1be3,D21,2021-01-05,0.0,0.4608480930328369,False
a582e74569f9aecc1be3,D21,2021-01-06,0.0,0.518524706363678,False
a582e74569f9aecc1be3,D21,2021-01-07,0.320677787065506,0.5510148406028748,True
a582e74569f9aecc1be3,D21,2021-01-08,0.3661214709281921,0.6290999054908752,True
a582e74569f9aecc1be3,D21,2021-01-09,0.3818665444850921,0.6561543345451355,True
a582e74569f9aecc1be3,D21,2021-01-10,0.5409566164016724,0.9295158982276917,True


station,fab,date,severity,score,isAnomaly,variable,contributionScore
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,debitcard,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,others,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,selfservice,0.2004262059926986
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,fleetcard,0.146622285246849
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,not_vip,0.0735598057508468
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,diesel,0.059380155056715
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,vip,0.0285757258534431
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,gasline,0.0272580627351999
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,nonselfservice,0.0216491017490625
a582e74569f9aecc1be3,D21,2021-01-01,0.356383353471756,0.6123670339584351,True,scooter,0.0137970726937055
