In [1]:
import math
import datetime
import pandas as pd

In [2]:
CAR_CONDITION_COEFFICIENT = {'excellent': 1.04, 'good': 1, 'fair': 0.95, 'bad': 0.89}

def cal_profit_rate(df):
    temp = df['adjust_sell_price']
    for i in range(0, 10000):
        buy = temp + i*0.03
        rate = 0.34 * math.e ** (-0.6 * math.log(buy, math.e))
        if rate <= 0.101:
            rate = 0.101
        div = df['adjust_sell_price'] - buy * (1-rate)
        if div > 0 :
            continue
        else:
            return buy
        
def adjust_condition(df):
    # 车况判断两年以内优秀,8-3年良好,9-11年一般,12年以上较差
    used_years = df['used_years']
    if used_years <= 2:
        condition = 'excellent'
    elif 2 < used_years <= 8:
        condition = 'good'
    elif 8 < used_years <= 11:
        condition = 'fair'
    elif 11 < used_years:
        condition = 'bad'
        
    if condition != df['condition']:
        return float('%.2f' % ((df['adjust_buy_price'] / CAR_CONDITION_COEFFICIENT[df['condition']]) * CAR_CONDITION_COEFFICIENT[condition]))
    return float('%.2f' % (df['adjust_buy_price']))

tiantianpai = pd.read_csv('../script/man.csv')
warehouse_k = pd.read_csv('../tmp/train/div_warehouse_k_param.csv')
div_price_bn_k_param = pd.read_csv('../tmp/train/div_price_bn_k_param.csv')

car_autohome_all = pd.read_csv('../tmp/train/car_autohome_all.csv')
car_autohome_all = car_autohome_all.sort_values(by=['brand_slug', 'model_slug', 'online_year', 'price_bn']).reset_index(drop=True)
car_autohome_all['used_years'] = datetime.datetime.now().year - car_autohome_all['online_year']
car_autohome_all.loc[(car_autohome_all['used_years'] < 0), 'used_years'] = 0

global_model_mean_temp = pd.read_csv('../tmp/train/global_model_mean_temp.csv').rename(columns={'detail_model_slug':'gpj_detail_slug'})
tiantianpai = tiantianpai.loc[:, ['title','year','month','mile','condition','city','price','gpj_detail_slug','brand_name','model_name','detail_name','online_year','new_sell_price','new_buy_price']]
tiantianpai = tiantianpai.merge(global_model_mean_temp.loc[:,['gpj_detail_slug','brand_slug','model_slug','price_bn','detail_slug']],how='left',on=['gpj_detail_slug'])
tiantianpai['used_years'] = datetime.datetime.now().year - tiantianpai['online_year']
tiantianpai.loc[(tiantianpai['used_years'] < 0), 'used_years'] = 0
# 调整车龄差异
k = 0.028
tiantianpai['warehouse_year'] = tiantianpai['year'] - tiantianpai['online_year']
tiantianpai['adjust_sell_price'] = tiantianpai['price'] / (k * tiantianpai['warehouse_year'] + 1)

# 调整收购价和零售价
tiantianpai['adjust_buy_price'] = tiantianpai.apply(cal_profit_rate, axis=1)

# 调整车况
tiantianpai['adjust_buy_price'] = tiantianpai.apply(adjust_condition, axis=1)
# 根据款型计算中位数
median_price = tiantianpai.groupby(['brand_slug', 'brand_name', 'model_slug', 'model_name', 'detail_slug', 'online_year', 'price_bn'])['adjust_buy_price'].median().reset_index().rename(columns={'adjust_buy_price': 'median_price'})
median_price = median_price.sort_values(by=['brand_slug', 'model_slug', 'online_year', 'price_bn']).reset_index(drop=True)

In [3]:
# 取低配数据
low_config_car = median_price.loc[median_price.groupby(['brand_slug', 'model_slug', 'online_year']).price_bn.idxmin(), :].reset_index(drop=True)
low_config_car = low_config_car.drop_duplicates(['model_slug', 'online_year']).reset_index(drop=True)

# 调整指导价差,确保同条件下高配比低配价格高
part1 = pd.DataFrame()
for model_slug, online_year in low_config_car.loc[:, ['model_slug', 'online_year']].values:
    car_autohome_temp = car_autohome_all.loc[(car_autohome_all['model_slug'] == model_slug)&(car_autohome_all['online_year'] == online_year), :].reset_index(drop=True)
    car_autohome_temp = car_autohome_temp.merge(median_price.loc[:, ['detail_slug', 'median_price']], how='left', on=['detail_slug'])
    low_config_price, price_bn = car_autohome_temp.loc[(car_autohome_temp['median_price'].notnull()), ['median_price', 'price_bn']].values[0]
    used_years = car_autohome_temp.loc[0, 'used_years']
    k = div_price_bn_k_param.loc[(div_price_bn_k_param['used_years'] == used_years), ['k']].values[0]
    for i in range(0, len(car_autohome_temp)):
        car_autohome_temp.loc[i, 'median_price'] = float('%.2f' % ((car_autohome_temp.loc[i, 'price_bn'] - price_bn) * k + low_config_price))
    part1 = part1.append(car_autohome_temp, sort=False).reset_index(drop=True)

In [4]:
def update_price(df, part):
    price = part.loc[(part['detail_slug'] == df['detail_slug']), ['median_price']].values
    if len(price) != 0:
        return price[0][0]
    return df['median_price']

global_model_mean_temp = pd.read_csv('../tmp/train/global_model_mean_temp.csv')
global_model_mean_temp['median_price'] = global_model_mean_temp.apply(update_price,args=(part1,), axis=1)
global_model_mean_temp.to_csv('../tmp/train/global_model_mean_temp.csv', index=False)