In [19]:
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm
import datetime
import warnings
import random
from functools import reduce
from os import walk
warnings.filterwarnings('ignore')

In [20]:
data_sets_sort = pd.read_csv('F:/aliyuun_datas/2021数字汽车大赛/temp_df/datasets_sort.csv')
data_sets_sort.shape

(6880557, 33)

In [21]:
data_sets_sort.columns

Index(['vin', '时间', '车速', '车辆状态', '充电状态', '总电压', '总电流', '累计里程', 'SOC',
       '电池单体电压最高值', '最高电压电池单体代号', '电池单体电压最低值', '最低电压电池单体代号', '最高温度值',
       '最高温度探针单体代号', '最低温度值', '最低温度探针单体代号', '最高报警等级', 'DC_DC状态', '档位',
       '驱动电机状态', '驱动电机转速', '驱动电机转矩', '驱动电机温度', '驱动电机控制器温度', '电机控制器输入电压',
       '电机控制器直流母流电流', '经度', '维度', '加速踏板行程值', '制动踏板状态', 'day', 'second'],
      dtype='object')

# <font color=red>构造时间变化特征，车速变化特征，加速度特征</font>

In [22]:
def init_feas_a(car_dict, data_sets_sort):
    new_sets_list = list()
    for k, v in tqdm(car_dict.items()):
        df = data_sets_sort[data_sets_sort['vin']==k].reset_index(drop=True)
        df['speed'] = df['车速'] / 3.6
        time_a = pd.concat([pd.DataFrame({'second': [0]}), df[['second']]]).reset_index(drop=True)
        time_b = df[['second']]
        time_changed = time_b - time_a
        time_changed.drop([len(time_changed)-1], axis=0, inplace=True)
        df['time_changed'] = time_changed
        speed_a = pd.concat([pd.DataFrame({'speed': [0]}), df[['speed']]]).reset_index(drop=True)
        speed_b = df[['speed']]
        speed_changed = speed_b - speed_a
        speed_changed.drop([len(speed_changed)-1], axis=0, inplace=True)
        df['speed_changed'] = speed_changed
        df['a'] = df['speed_changed'] / df['time_changed']
        df['a'] = df['a'].fillna(0)  # 0/0==NaN
        new_sets_list.append(df)
        
    datasets = pd.concat(new_sets_list).reset_index(drop=True)
    print("拥有速度变化，时间变化，加速度的全量数据集：", datasets.shape)
    return datasets

datasets = init_feas_a(dict(data_sets_sort['vin'].value_counts()), data_sets_sort=data_sets_sort)

100%|██████████| 10/10 [00:07<00:00,  1.37it/s]


拥有速度变化，时间变化，加速度的全量数据集： (6880557, 37)


In [23]:
datasets[['speed', 'speed_changed', 'time_changed', 'a']]

Unnamed: 0,speed,speed_changed,time_changed,a
0,0.000000,0.0,0.0,0.0
1,0.000000,0.0,15.0,0.0
2,0.000000,0.0,15.0,0.0
3,0.000000,0.0,15.0,0.0
4,0.000000,0.0,15.0,0.0
...,...,...,...,...
6880552,3.111111,0.0,15.0,0.0
6880553,3.111111,0.0,15.0,0.0
6880554,3.111111,0.0,15.0,0.0
6880555,3.111111,0.0,15.0,0.0


In [24]:
len(datasets[datasets['a']>1.5])

1994

In [25]:
datasets['a'].value_counts()

 0.000000    2038262
-0.207407     181600
 0.207407     181131
 0.083333       7622
-0.083333       7256
              ...   
-0.029704          1
-0.000161          1
 0.037346          1
-0.067677          1
 0.004531          1
Name: a, Length: 40642, dtype: int64

# <font color=red>构造驱动电机变化的相关特征</font>

## <font color=orange>1. 单位时间内驱动电机转速的变化：engine_speed_changed</font> 每辆车的值都相同
## <font color=orange>2. 单位时间内驱动电机的转矩变化：engine_torque_changed</font> 每辆车的值都相同
## <font color=orange>3. 单位时间内驱动点击的温度变化：engine_temperature_changed</font>
## <font color=orange>4. 单位时间内驱动点击的控制器温度变换：engine_controller_tpt_changed</font>

In [26]:
car_dict = dict(data_sets_sort['vin'].value_counts())

fea_dict = {'驱动电机转速': 'engine_speed_changed',
            '驱动电机转矩': 'engine_torque_changed',
            '驱动电机温度': 'engine_temperature_changed',
            '驱动电机控制器温度': 'engine_controller_tpt_changed'
            }

car_dict

{'LVCB4L4DXHM002840': 884505,
 'LVCB4L4D0HM002829': 862206,
 'LVCB4L4D7HM002830': 850061,
 'LVCB4L4D2HM002833': 841948,
 'LVCB4L4D4HM002803': 817075,
 'LVCB4L4D4HM002834': 557430,
 'LVCB4L4D0HM002832': 537284,
 'LVCB4L4D3HM002839': 531358,
 'LVCB4L4D9HM002862': 523776,
 'LVCB4L4D1HM002841': 474914}

In [27]:
# 构建特征
def init_engine_feas(car_dict, fea_dict):
    datasets_list = list()
    for k, v in tqdm(car_dict.items()):
        temp_data = datasets[datasets['vin']==k].reset_index(drop=True)
        # print("temp_data:", temp_data.shape)
        for old_fea, new_fea in fea_dict.items():
            engine_fea_a = temp_data[[old_fea]]
            engine_fea_b = pd.concat([pd.DataFrame({old_fea: [0]}), temp_data[[old_fea]]]).reset_index(drop=True)   # 在开头增加0值
            engine_fea = engine_fea_b - engine_fea_a
            engine_fea.drop([len(engine_fea)-1], axis=0, inplace=True)
            engine_fea.loc[0] = 0
            temp_data[new_fea] = engine_fea
        datasets_list.append(temp_data)
    datas = pd.concat(datasets_list).reset_index(drop=True)
    print("在排序后的全量数据集基础上构建新的特征：", datas.shape)
    return datas

datas = init_engine_feas(car_dict=car_dict, fea_dict=fea_dict)

100%|██████████| 10/10 [00:11<00:00,  1.15s/it]


在排序后的全量数据集基础上构建新的特征： (6880557, 41)


In [28]:
engine_list = list()
for k, v in fea_dict.items():
    engine_list.append(v)

datas[engine_list]

Unnamed: 0,engine_speed_changed,engine_torque_changed,engine_temperature_changed,engine_controller_tpt_changed
0,0.0,0.0,0.0,0.0
1,0.0,0.0,1.0,-1.0
2,0.0,0.0,-1.0,0.0
3,0.0,0.0,1.0,-1.0
4,0.0,0.0,0.0,0.0
...,...,...,...,...
6880552,0.0,0.0,0.0,0.0
6880553,0.0,0.0,0.0,0.0
6880554,0.0,0.0,0.0,0.0
6880555,0.0,0.0,0.0,0.0


# <font color=red>构造电功率特征</font>

In [29]:
datas[['总电压', '总电流']]

Unnamed: 0,总电压,总电流
0,611.0,7.7
1,610.7,7.7
2,610.5,7.9
3,610.3,8.2
4,610.2,7.0
...,...,...
6880552,622.7,-328.2
6880553,635.8,-346.4
6880554,631.9,-20.4
6880555,629.9,-20.7


In [30]:
datas['w'] = datas['总电压'] * datas['总电流'].abs()

In [31]:
datas[['w']]

Unnamed: 0,w
0,4704.70
1,4702.39
2,4822.95
3,5004.46
4,4271.40
...,...
6880552,204370.14
6880553,220241.12
6880554,12890.76
6880555,13038.93


In [32]:
datas.columns

Index(['vin', '时间', '车速', '车辆状态', '充电状态', '总电压', '总电流', '累计里程', 'SOC',
       '电池单体电压最高值', '最高电压电池单体代号', '电池单体电压最低值', '最低电压电池单体代号', '最高温度值',
       '最高温度探针单体代号', '最低温度值', '最低温度探针单体代号', '最高报警等级', 'DC_DC状态', '档位',
       '驱动电机状态', '驱动电机转速', '驱动电机转矩', '驱动电机温度', '驱动电机控制器温度', '电机控制器输入电压',
       '电机控制器直流母流电流', '经度', '维度', '加速踏板行程值', '制动踏板状态', 'day', 'second',
       'speed', 'time_changed', 'speed_changed', 'a', 'engine_speed_changed',
       'engine_torque_changed', 'engine_temperature_changed',
       'engine_controller_tpt_changed', 'w'],
      dtype='object')

# <font color=red>构造车辆没15秒的移动距离特征 **distance** 和转角特征 **degree** </font>

In [33]:
def getDistance(latA, lonA, latB, lonB):
    ra = 6378140  # 赤道半径
    rb = 6356755  # 极半径
    flatten = (ra - rb) / ra  # Partial rate of the earth
    # change angle to radians
    radLatA = math.radians(latA)
    radLonA = math.radians(lonA)
    radLatB = math.radians(latB)
    radLonB = math.radians(lonB)

    pA = math.atan(rb / ra * math.tan(radLatA))
    pB = math.atan(rb / ra * math.tan(radLatB))
    x = math.acos(math.sin(pA) * math.sin(pB) + math.cos(pA) * math.cos(pB) * math.cos(radLonA - radLonB))
    c1 = (math.sin(x) - x) * (math.sin(pA) + math.sin(pB)) ** 2 / math.cos(x / 2) ** 2
    c2 = (math.sin(x) + x) * (math.sin(pA) - math.sin(pB)) ** 2 / math.sin(x / 2) ** 2
    dr = flatten / 8 * (c1 - c2)
    distance = ra * (x + dr)
    distance = round(distance / 1000, 4)
    return distance*1000

In [34]:
res= datas['经度'].apply(lambda radLat:math.radians(radLat))
res

0          2.030851e+06
1          2.030851e+06
2          2.030851e+06
3          2.030851e+06
4          2.030851e+06
               ...     
6880552    2.030851e+06
6880553    2.030851e+06
6880554    2.030851e+06
6880555    2.030851e+06
6880556    2.030851e+06
Name: 经度, Length: 6880557, dtype: float64

In [None]:
# 计算角度
def getDegree(latA, lonA, latB, lonB):
    radLatA = math.radians(latA)
    radLonA = math.radians(lonA)
    radLatB = math.radians(latB)
    radLonB = math.radians(lonB)
    dLon = radLonB - radLonA
    y = math.sin(dLon) * math.cos(radLatB)
    x = math.cos(radLatA) * math.sin(radLatB) - math.sin(radLatA) * math.cos(radLatB) * math.cos(dLon)
    brng = math.degrees(math.atan2(y, x))
    brng = round((brng + 360) % 360, 4)
    brng = int(brng)
    if (brng == 0.0) or ((brng == 360.0)):
        return '正北方向'
    elif brng == 90.0:
        return '正东方向'
    elif brng == 180.0:
        return '正南方向'
    elif brng == 270.0:
        return '正西方向'
    elif 0 < brng < 90:
        return f'北偏东{brng}'
    elif 90 < brng < 180:
        return f'东偏南{brng - 90}'
    elif 180 < brng < 270:
        return f'西偏南{270 - brng}'
    elif 270 < brng < 360:
        return f'北偏西{brng - 270}'
    else:
        pass

In [15]:
def init_distance(car_dict):
    datas['经度'] = datas['经度'] / 1000000
    datas['维度'] = datas['维度'] / 1000000
    datasets_list = list()
    for k, v in tqdm(car_dict.items()):
        data_temp = datas[datas['vin']==k].reset_index(drop=True)        
        jingdu = pd.concat([pd.DataFrame({'经度': [list(data_temp['经度'])[0]]}), data_temp[['经度']]]).reset_index(drop=True)
        jingdu.drop([len(jingdu)-1], axis=0, inplace=True)
        data_temp['经度_'] = jingdu
        weidu = pd.concat([pd.DataFrame({'维度': [list(data_temp['维度'])[0]]}), data_temp[['维度']]]).reset_index(drop=True)
        weidu.drop([len(weidu)-1], axis=0, inplace=True)
        data_temp['维度_'] = weidu
        datasets_list.append(data_temp)
    data01 = pd.concat(datasets_list).reset_index(drop=True)
    print("拥有 经度_ 维度_ 的全量数据集：", data01.shape)
    return data01
  
data01 = init_distance(car_dict=car_dict)

100%|██████████| 10/10 [00:06<00:00,  1.50it/s]


拥有 经度_ 维度_ 的全量数据集： (6880557, 44)


In [None]:
def getDegree(latA, lonA, latB, lonB):
    radLatA = latA.apply(lambda radlat: math.radians(radlat))
    radLonA = lonB.apply(lambda radlat: math.radians(radlat))
    radLatB = latB.apply(lambda radlat: math.radians(radlat))
    radLonB = lonB.apply(lambda radlat: math.radians(radlat))
    dLon = radLonB - radLonA
    # y = math.sin(dLon) * math.cos(radLatB)
    # x = math.cos(radLatA) * math.sin(radLatB) - math.sin(radLatA) * math.cos(radLatB) * math.cos(dLon)
    # brng = math.degrees(math.atan2(y, x))
    # brng = round((brng + 360) % 360, 4)
    # brng = int(brng)
    y = dLon.apply(lambda dL: math.sin(dL) * radLatB.apply(lambda rltB: math.cos(rltB)))
    print(y)
getDegree(data01['经度_'], data01['维度_'], data01['经度'], data01['维度'])

In [41]:
radLatA = data01['经度_'].apply(lambda radlat: math.radians(radlat))
radLonA = data01['维度_'].apply(lambda radlat: math.radians(radlat))
radLatB = data01['经度'].apply(lambda radlat: math.radians(radlat))
radLonB = data01['维度'].apply(lambda radlat: math.radians(radlat))
dLon = radLonB - radLonA
print(dLon.dtype)
print(dLon)
y1 = dLon.apply(lambda dL: math.sin(dL))
y2 = radLatB.apply(lambda rltB: math.cos(rltB))

print(y1.dtype)
print(y2.dtype)

float64
0          0.0
1          0.0
2          0.0
3          0.0
4          0.0
          ... 
6880552    0.0
6880553    0.0
6880554    0.0
6880555    0.0
6880556    0.0
Length: 6880557, dtype: float64
float64
float64


In [None]:
def getDistance(latA, lonA, latB, lonB):
    ra = 6378140    # 赤道半径
    rb = 6356755    # 极半径
    flatten = (ra - rb) / ra
    # 四列特征弧度转换
    radLatA = latA.apply(lambda radlat: math.radians(radlat))
    radLonA = lonB.apply(lambda radlat: math.radians(radlat))
    radLatB = latB.apply(lambda radlat: math.radians(radlat))
    radLonB = lonB.apply(lambda radlat: math.radians(radlat))
    temp = radLatA - radLonB
    print(radLatA)
    pA = radLatA.apply(lambda rltA: rb/ra*math.tan(rltA))
    pB = radLatB.apply(lambda rltB: rb/ra*math.tan(rltB))
    x = pA.apply(lambda x1: math.sin(x1)) * pB.apply(lambda x2: math.sin(x2)) + pA.apply(lambda x3: math.cos(x3)) * pB.apply(lambda x4: math.cos(x4) * temp.apply(lambda x5: math.cos(x5)))
    x = x.apply(lambda x6: math.acos(x6))
    c1 = x.apply(lambda y1: math.sin(x)) - x
    c1 = c1 * pA.apply(lambda y2: math.sin(y2))
 
getDistance(data01['经度_'], data01['维度_'], data01['经度'], data01['维度'])

In [None]:
data01[['经度', '维度', '经度_', '维度_']]

# <font color=red>构造每15秒内车移动的 距离特征**distance** 角度特征**degree** 转角特征**degree_changed**</font>

In [None]:
class init_distance_degree(object):

    """
    根据经纬度计算单位时间内的移动距离函数
    """
    def getDistance(self, latA, lonA, latB, lonB):
        ra = 6378140  # 赤道半径
        rb = 6356755  # 极半径
        flatten = (ra - rb) / ra  # Partial rate of the earth
        # change angle to radians
        radLatA = math.radians(latA)
        radLonA = math.radians(lonA)
        radLatB = math.radians(latB)
        radLonB = math.radians(lonB)
        pA = math.atan(rb / ra * math.tan(radLatA))
        pB = math.atan(rb / ra * math.tan(radLatB))
        x = math.acos(math.sin(pA) * math.sin(pB) + math.cos(pA) * math.cos(pB) * math.cos(radLonA - radLonB))
        c1 = (math.sin(x) - x) * (math.sin(pA) + math.sin(pB)) ** 2 / math.cos(x / 2) ** 2
        c2 = (math.sin(x) + x) * (math.sin(pA) - math.sin(pB)) ** 2 / math.sin(x / 2) ** 2
        dr = flatten / 8 * (c1 - c2)
        distance = ra * (x + dr)
        distance = round(distance / 1000, 4)
        return distance*1000

    """
    根据经纬度计算单位时间内的转角函数
    """
    def getDegree(self, latA, lonA, latB, lonB):
        radLatA = math.radians(latA)
        radLonA = math.radians(lonA)
        radLatB = math.radians(latB)
        radLonB = math.radians(lonB)
        dLon = radLonB - radLonA
        y = math.sin(dLon) * math.cos(radLatB)
        x = math.cos(radLatA) * math.sin(radLatB) - math.sin(radLatA) * math.cos(radLatB) * math.cos(dLon)
        brng = math.degrees(math.atan2(y, x))
        brng = round((brng + 360) % 360, 4)
        brng = int(brng)
        if (brng == 0.0) or ((brng == 360.0)):
            return 90
        elif brng == 90.0:
            return 0
        elif brng == 180.0:
            return 270
        elif brng == 270.0:
            return 180
        elif 0 < brng < 90:
            return 90-brng
        elif 90 < brng < 180:
            return 360-brng
        elif 180 < brng < 270:
            return 180+brng
        elif 270 < brng < 360:
            return 90+brng
        else:
            pass

    """
    构造latB、lonB
    """
    def init_latB_lonB(self, car_dict):
        datasets['经度'] = datasets['经度'] / 1000000
        datasets['维度'] = datasets['维度'] / 1000000
        datasets_list = list()
        for k, v in tqdm(car_dict.items()):
            data_temp = datasets[datasets['vin']==k].reset_index(drop=True)        
            jingdu = pd.concat([pd.DataFrame({'经度': [list(data_temp['经度'])[0]]}), data_temp[['经度']]]).reset_index(drop=True)
            jingdu.drop([len(jingdu)-1], axis=0, inplace=True)
            data_temp['经度_'] = jingdu
            weidu = pd.concat([pd.DataFrame({'维度': [list(data_temp['维度'])[0]]}), data_temp[['维度']]]).reset_index(drop=True)
            weidu.drop([len(weidu)-1], axis=0, inplace=True)
            data_temp['维度_'] = weidu
            datasets_list.append(data_temp)
        datas = pd.concat(datasets_list).reset_index(drop=True)
        print("构建latB、lonB完毕...")
        return datas
    
    def init_distance(self):
        datasets['distance'] = datasets
        



In [18]:
data01.columns

Index(['vin', '时间', '车速', '车辆状态', '充电状态', '总电压', '总电流', '累计里程', 'SOC',
       '电池单体电压最高值', '最高电压电池单体代号', '电池单体电压最低值', '最低电压电池单体代号', '最高温度值',
       '最高温度探针单体代号', '最低温度值', '最低温度探针单体代号', '最高报警等级', 'DC_DC状态', '档位',
       '驱动电机状态', '驱动电机转速', '驱动电机转矩', '驱动电机温度', '驱动电机控制器温度', '电机控制器输入电压',
       '电机控制器直流母流电流', '经度', '维度', '加速踏板行程值', '制动踏板状态', 'day', 'second',
       'speed', 'time_changed', 'speed_changed', 'a', 'engine_speed_changed',
       'engine_torque_changed', 'engine_temperature_changed',
       'engine_controller_tpt_changed', 'w', '经度_', '维度_'],
      dtype='object')

In [36]:
data01[['']]

14.0    5315820
0.0     1499869
13.0      64868
Name: 档位, dtype: int64

In [17]:
# data01.to_csv('F:/aliyuun_datas/2021数字汽车大赛/temp_df/datasets_aw.csv', index=False)