In [2]:
# ID重置，先断开
import pandas as pd
import numpy as np

def reset_id(df):
    df = df.sort_values(by='time')
    df = df[df['lane'] != 0]  # 筛选lane不为0的车道
    m = 100  # 新的ID从100开始（覆盖到了99）
    i = 0
    dif_time = 0
    id_list = df['id'].unique()

    while i < len(id_list):
        id = id_list[i]
        k = 0
        j = 0
        new_data = pd.DataFrame()
        data = df[df['id'] == id]  # 筛选某一车辆

        for k in range(len(data)):  # 遍历该车辆每一行
            dif_time = 0
            if k >= 1:
                dif_time = data.loc[data.index[k], 'time'] - data.loc[data.index[k - 1], 'time']

            if dif_time > 1:
                if dif_time >= 390:  # 间隔时间太长，认为是两辆车，分开
                    df1 = data.iloc[j:k]  # 从前一个拆分位置到现在拆分位置的前一行
                    new_data = pd.concat([new_data, df1], axis=0)
                    (data.iloc[k:]).loc[:, 'id'] = m  # 从拆分位置到最后一行
                    j = k  # 记录拆分位置
                    id_list = np.append(id_list, m)
                    m = m + 1

                if dif_time < 390:  # 中等间隔时间可能是目标遮挡，短距离可能是遮挡或设备抖动，使用匀速，计算距离讨论
                    # 实际距离的计算
                    dif_x = data.loc[data.index[k], 'x'] - data.loc[data.index[k - 1], 'x']
                    dif_y = data.loc[data.index[k], 'y'] - data.loc[data.index[k - 1], 'y']
                    real_dis = np.sqrt(dif_x ** 2 + dif_y ** 2)

                    # 理论距离的计算，按匀速来计算，如果遮挡，应该会降速、很难再保持匀速
                    cal_dis = (data.loc[data.index[k - 1], 'speed']) * dif_time / (3.6 * 13)

                    if k >= 2:
                        test_y1 = data.loc[data.index[k - 1], 'y'] - data.loc[data.index[k - 2], 'y']
                        if k + 1 < len(data):
                            test_y2 = data.loc[data.index[k + 1], 'y'] - data.loc[data.index[k], 'y']
                        else:
                            test_y2 = 0
                            
                    if (real_dis > (2*cal_dis)/3) or (test_y1 * dif_y < 0) or (test_y1 * test_y2 < 0):  # 真实距离超出计算的理论距离，或者行驶方向变更（异号），认为是两辆车，分开
                        df1 = data.iloc[j:k]
                        new_data = pd.concat([new_data, df1], axis=0)
                        (data.iloc[k:]).loc[:, 'id'] = m
                        j = k
                        id_list = np.append(id_list, m)
                        m = m + 1

        i += 1  # 给id_list增加新的

        df2 = data.iloc[j:]  # 从最后一个拆分位置到最后一行
        new_data = pd.concat([new_data, df2], axis=0)

        df = df[df['id'] != id]
        df = pd.concat([df, new_data], axis=0)

    print('断开了', m - 100, '次！')
    return df
    
def filtering(df, min_length):
    df = df.sort_values(by='time')
    id_list = df['id'].unique()
    deleted_count = 0
    for vehicle_id in id_list:
        vehicle_df = df[df['id'] == vehicle_id]
        if len(vehicle_df) <= min_length:
            df.drop(df[df['id'] == vehicle_id].index, inplace=True)
            deleted_count += 1   
    print(f"Filtering completed. Deleted {deleted_count} IDs.")
    return df
    
df = pd.read_csv('多伦路口1_时间戳修正.csv')
df1 = reset_id(df)

# 滤波，删除长度小于1s的噪声点
min_length = 13 # 最小轨迹长度
filtered_df = filtering(df1, min_length)
filtered_df.to_csv('多伦路口2_断开+滤波.csv', index=False)

断开了 1212 次！
Filtering completed. Deleted 831 IDs.


In [3]:
# ID补全，匀加速
import pandas as pd
import numpy as np

def complete_id1(df):
    df = df.sort_values(by='time')
    n = 0
    i = 0
    id_list = df['id'].unique()
    
    while i < len(id_list):
        id = id_list[i]
        k = 0
        j = 0
        missing_data = pd.DataFrame()

        data = df[df['id'] == id]  # 筛选某一车辆

        for k in range(len(data)):  # 遍历该车辆每一行
            dif_time = 0
            if k >= 1:
                dif_time = data.loc[data.index[k], 'time'] - data.loc[data.index[k - 1], 'time']
            if dif_time > 1:
                if dif_time < 390:  # 中等间隔时间可能是目标遮挡，短距离可能是遮挡或设备抖动，y使用匀加速/匀减速，x用分段，计算距离讨论
                    dif_x = data.loc[data.index[k], 'x'] - data.loc[data.index[k - 1], 'x']
                    dif_y = data.loc[data.index[k], 'y'] - data.loc[data.index[k - 1], 'y']
                    real_dis = np.sqrt(dif_x ** 2 + dif_y ** 2)

                    # 理论距离的计算，按匀速来计算，如果遮挡，应该会降速、很难再保持匀速
                    cal_dis = (data.loc[data.index[k - 1], 'speed']) * dif_time / (3.6 * 13)

                    vy0 = abs(data.loc[data.index[k - 1], 'vy'] / 3.6)
                    vy1 = abs(data.loc[data.index[k], 'vy'] / 3.6)
                    t0 = dif_time / 13

                    acc = 2 * (dif_y - vy0 * t0) / (t0 * t0)
                    
                    imp_x = data.loc[data.index[k - 1], 'x']
                    y0 = data.loc[data.index[k - 1], 'y']
                    seg_x = dif_x / dif_time
                    imp_y1 = y0

                    if real_dis <= (2*cal_dis)/3:  # 距离合理，补全
                        for c in range(dif_time - 1):
                            imp_x = imp_x + seg_x  # 计算补全点的坐标，x用分段（匀速）来算，y用匀加减速
                            t = (c + 1) / 13
                            imp_y = y0 + vy0 * t + acc * t * t / 2  # 新一时刻的y，匀加速/匀减速注定数据会出现先减后增或先增后减，过滤掉
                            
                            vx = seg_x * 13
                            vy = vy0 + acc * t
                            v0 = np.sqrt(vx * vx + vy * vy)

                            row_data = {'id': [id],
                                        'lane': [np.nan],
                                        'x': [imp_x],
                                        'y': [imp_y],
                                        'speed': [v0 * 3.6],  # 转换回km/h
                                        'vx': [vx * 3.6],  # 转换回km/h
                                        'vy': [vy * 3.6],  # 转换回km/h
                                        'time': [data.loc[data.index[k - 1], 'time'] + 1]}

                            data.loc[data.index[k - 1], 'time'] = data.loc[data.index[k - 1], 'time'] + 1
                            n = n + 1
                            df_imputation = pd.DataFrame.from_dict(row_data)
                            imp_y1 = imp_y  # 记录这一时刻的y
                            missing_data = pd.concat([missing_data, df_imputation])

        i += 1  # 给id_list增加新的
        if len(missing_data) > 0:          
            missing_data = missing_data.sort_values(by='time')
            if missing_data['y'].is_monotonic_decreasing:
                df = pd.concat([df, missing_data], axis=0)

    return df

df = pd.read_csv('多伦路口2_断开+滤波.csv')
df1 = complete_id1(df)
df1.to_csv('多伦路口3_第一次补全.csv', index=False)

In [4]:
# ID补全，匀速
import pandas as pd
import numpy as np

def complete_id2(df):
    df = df.sort_values(by='time')
    n = 0
    i = 0
    id_list = df['id'].unique()
    
    while i < len(id_list):
        id = id_list[i]
        k = 0
        j = 0
        missing_data = pd.DataFrame()

        data = df[df['id'] == id]  # 筛选某一车辆

        for k in range(len(data)):  # 遍历该车辆每一行
            dif_time = 0
            if k >= 1:
                dif_time = data.loc[data.index[k], 'time'] - data.loc[data.index[k - 1], 'time']
            if dif_time > 1:
                if dif_time < 390:  # 中等间隔时间可能是目标遮挡，短距离可能是遮挡或设备抖动，y使用匀加速/匀减速，x用分段，计算距离讨论
                    dif_x = data.loc[data.index[k], 'x'] - data.loc[data.index[k - 1], 'x']
                    dif_y = data.loc[data.index[k], 'y'] - data.loc[data.index[k - 1], 'y']
                    real_dis = np.sqrt(dif_x ** 2 + dif_y ** 2)

                    # 理论距离的计算，按匀速来计算，如果遮挡，应该会降速、很难再保持匀速
                    cal_dis = (data.loc[data.index[k - 1], 'speed']) * dif_time / (3.6 * 13)

                    vy0 = abs(data.loc[data.index[k - 1], 'vy'] / 3.6)
                    vy1 = abs(data.loc[data.index[k], 'vy'] / 3.6)

                    seg_x = dif_x / dif_time
                    seg_y = dif_y / dif_time
                    
                    imp_x = data.loc[data.index[k-1], 'x']
                    imp_y = data.loc[data.index[k-1], 'y']
                    
                    if real_dis <= (2*cal_dis)/3:  # 距离合理，补全
                        for c in range(dif_time - 1):
                            imp_x = imp_x + seg_x  # 计算补全点的坐标，x、y用分段（匀速）来算
                            imp_y = imp_y + seg_y
                            
                            vx = seg_x * 13
                            vy = seg_y * 13
                            v0 = np.sqrt(vx * vx + vy * vy)
                            
                            row_data = {'id': [id],
                                        'lane': [np.nan],
                                        'x': [imp_x],
                                        'y': [imp_y],
                                        'speed': [v0 * 3.6],  # 转换回km/h
                                        'vx': [vx * 3.6],  # 转换回km/h
                                        'vy': [vy * 3.6],  # 转换回km/h
                                        'time': [data.loc[data.index[k - 1], 'time'] + 1]}

                            data.loc[data.index[k - 1], 'time'] = data.loc[data.index[k - 1], 'time'] + 1
                            n = n + 1
                            df_imputation = pd.DataFrame.from_dict(row_data)
                            imp_y1 = imp_y  # 记录这一时刻的y
                            missing_data = pd.concat([missing_data, df_imputation])

        i += 1  # 给id_list增加新的

        if len(missing_data) > 0:
            df = pd.concat([df, missing_data], axis=0)

    print('补全了', n, '次！')
    return df

def filter_dataframe(df, threshold=3):
    df = df.sort_values(by='time')
    id_list = df['id'].unique()
    n = 0  # 统计删除数量

    for vehicle_id in id_list:
        vehicle_df = df[df['id'] == vehicle_id]
        # 计算差分的绝对值，并检查是否超过阈值
        diffs = vehicle_df['y'].diff()

        # 筛选diffs大于0的部分，如果大于0，则从该处开始计算累加和
        filtered_data = diffs[diffs > 0]
        cumsum_data = filtered_data.cumsum()
        
        if len(vehicle_df) <= 39 or (diffs > 0).any():
            # diffs>0 主要是去除y单增的情况（因为本数据y是递减的）
            df = df[df['id'] != vehicle_id]
            n += 1
    print(f"filtering completed. Deleted {n} IDs.")
    return df
    
df = pd.read_csv('多伦路口3_第一次补全.csv')
df1 = complete_id2(df)
df1.to_csv('多伦路口3_第二次补全.csv', index=False)

filtered_df = filter_dataframe(df1)
filtered_df.to_csv('多伦路口3_补全+滤波.csv', index=False)

补全了 10175 次！
filtering completed. Deleted 160 IDs.


In [5]:
# 雷达补帧: 将雷达补全为1s30帧，平均插值补全
import pandas as pd

def interpolate_data(df, num):
    interpolated_data = pd.DataFrame()
    result_df = pd.DataFrame()
    
    for i in range(0, len(df), num):  # 每13个数据进行一次循环
        interpolated_data = pd.DataFrame()
        subset_df = df.iloc[i:i+num]
        df1 = subset_df.copy()
        for j in range(len(subset_df)):
            try:
                new_row = (subset_df.iloc[j] + subset_df.iloc[j+1]) / 2  # 更新除了 id 外的其他属性为两个数据点的均值
                interpolated_data = pd.concat([interpolated_data, pd.DataFrame([new_row])])
            except IndexError:
                break  # 如果 j+1 超出索引范围，则跳出循环
                
        if len(interpolated_data) > 0:
            interpolated_data = pd.concat([interpolated_data, subset_df.iloc[-1:]])[:-1]  
            result_df = pd.concat([result_df, interpolated_data])  # 存储第一部分结果
            result_df = result_df.sort_values(by='time')
            df1 = pd.concat([df1, interpolated_data])  # 临时df
            df1 = df1.sort_values(by='time')

            # 再补全6个
            subset_df = df1[:6]
            interpolated_data = pd.DataFrame()
            try:
                for j in range(5):
                    new_row = (subset_df.iloc[j] + subset_df.iloc[j+1]) / 2  # 更新除了 id 外的其他属性为两个数据点的均值
                    interpolated_data = pd.concat([interpolated_data, pd.DataFrame([new_row])])[:5]
            except IndexError:
                break  # 如果发生 IndexError，则不运行下面的补全循环
            if len(interpolated_data) > 0:
                result_df = pd.concat([result_df, interpolated_data])  # 存储第二部分结果
                result_df = result_df.sort_values(by='time')
    return result_df  # 补全的数据

df = pd.read_csv('多伦路口3_补全+滤波.csv')
df = df[df['lane'] != 0]  # 筛选lane不为0的车道
df = df.sort_values(by='time')

id_list = df['id'].unique()

for id in id_list:
    result_df = pd.DataFrame()
    df1 = df[df['id'] ==id]
    result_df = interpolate_data(df1, 13)
    df = pd.concat([df, result_df])
df = df.sort_values(by='time')

# 时间重新赋值：创建一个字典，将旧时间值映射到新的时间值
time_mapping = {}
current_new_time = 1  # 初始化新的时间值
for time_value in df['time']:  # 遍历原始数据中的时间值
    if time_value not in time_mapping:  # 如果当前时间值是第一次出现
        time_mapping[time_value] = current_new_time  # 将旧时间值映射到新的时间值
        current_new_time += 1  # 更新新的时间值
        
df.rename(columns={'time': 'old_time'}, inplace=True)

df['time'] = df['old_time'].map(time_mapping)
df.to_csv('多伦路口4_补帧.csv', index=False)

In [6]:
import pandas as pd
import numpy as np

def estimate_lane_width(df):
    df = df.sort_values(by='time')
    df1 = df[df['lane'].notna()]  # 去除空值行
    lane = pd.DataFrame()
    # 根据已有的大量数据，估算车道宽度
    for i in range(1, 5):
        df_1 = df1[df1['lane'] == i]
        df_minmax = pd.DataFrame({'min_x_value': [df_1['x'].min()], 'max_x_value': [df_1['x'].max()]})
        lane = pd.concat([lane, df_minmax], axis=0, ignore_index=True) 
    lane.index = lane.index + 1
    return lane

if __name__ == "__main__":
    df = pd.read_csv('多伦路口4_补帧.csv')
    lane = estimate_lane_width(df)
    print("车道宽度为",lane.iloc[1][1] - lane.iloc[1][0],"m")
    print('lane',lane)
    
    # 车道赋值
    for index, row in df.iterrows():
        if row['x'] > 3.7 and row['x'] <= 7.4:
            df.at[index, 'lane'] = 1
        elif row['x'] >= 0.0 and row['x'] <= 3.7:
            df.at[index, 'lane'] = 2
        elif row['x'] >= -3.7 and row['x'] < 0.0:
            df.at[index, 'lane'] = 3
        elif row['x'] >= -7.4 and row['x'] < -3.7:
            df.at[index, 'lane'] = 4     
    df.to_csv('多伦路口5_车道重置.csv', index=False)

车道宽度为 3.6 m
lane    min_x_value  max_x_value
1          3.8          7.4
2          0.0          3.6
3         -3.6          0.0
4         -7.4         -3.8
