In [6]:
import pandas as pd 
import plotly.express as px

In [31]:
import os

SAVE_PATH = '..\preprocessed_data_defected'


def preproccess_data(data_path):
    types_dict = {
        "asphalt": 0,  # Асфальт
        "tiles": 1,  # Плитка
        "crumb": 2,  # крошка
        "stones": 3,  # брусчатка
    }

    gyro_df = pd.read_csv(data_path)
    gyro_df["gx"] = gyro_df['x']
    gyro_df["gy"] = gyro_df['y']
    gyro_df["gz"] = gyro_df['z']
    gyro_df = gyro_df.drop(columns=['x', 'y', 'z'])

    acc_df = pd.read_csv(data_path)
    acc_df["ax"] = acc_df['x']
    acc_df["ay"] = acc_df['y']
    acc_df["az"] = acc_df['z']
    acc_df = acc_df.drop(columns=['x', 'y', 'z'])

    loc_df = pd.read_csv(data_path)
    loc_df = loc_df.drop(columns=['bearingAccuracy', 'speedAccuracy',
                         'verticalAccuracy', 'horizontalAccuracy', 'speed', 'bearing', 'altitude',])

    df = pd.merge_asof(acc_df, gyro_df, on="seconds_elapsed",
                       direction="nearest")
    df = pd.merge_asof(df, loc_df, on="seconds_elapsed",
                       direction="nearest")
    df["time"] = df["time_x"]
    df = df.drop(columns=["time_x", "time_y"])
    cl = None
    for item in types_dict:
        if item in data_path:
            cl = types_dict[item]
            break
    if cl == None:
        print("couldn't find class")
    df["class"] = cl
    save_file_path = os.path.join(
        SAVE_PATH, os.path.basename(data_path)+'.csv')
    df.to_csv(save_file_path, index=False)
    print(f"OK – {save_file_path}")

In [32]:
for data_path in [os.path.join(r"..\raw_data\defected", path) for path in os.listdir(r"..\raw_data\defected")]:
    print(f"Trying {data_path}")
    try:
        preproccess_data(data_path)
    except Exception as e:
        print(f'Got error with {data_path}:', e)

Trying ..\raw_data\defected\asphalt_bad_bump-2024-06-15_17-01-46
OK – ..\preprocessed_data_defected\asphalt_bad_bump-2024-06-15_17-01-46.csv
Trying ..\raw_data\defected\asphalt_bump-2024-06-15_16-15-46
OK – ..\preprocessed_data_defected\asphalt_bump-2024-06-15_16-15-46.csv
Trying ..\raw_data\defected\asphalt_curb-2024-06-15_16-10-24
OK – ..\preprocessed_data_defected\asphalt_curb-2024-06-15_16-10-24.csv
Trying ..\raw_data\defected\stones_bad-2024-06-15_15-55-09
OK – ..\preprocessed_data_defected\stones_bad-2024-06-15_15-55-09.csv
Trying ..\raw_data\defected\stones_stair-2024-06-15_16-05-33
OK – ..\preprocessed_data_defected\stones_stair-2024-06-15_16-05-33.csv
Trying ..\raw_data\defected\stones_stair-2024-06-15_16-09-28
OK – ..\preprocessed_data_defected\stones_stair-2024-06-15_16-09-28.csv


In [10]:
import json

[os.path.join(r"..\preprocessed_data_defected", path) for path in os.listdir(r"..\preprocessed_data_defected")]

['..\\preprocessed_data_defected\\asphalt_bad_bump-2024-06-15_17-01-46.csv',
 '..\\preprocessed_data_defected\\asphalt_bump-2024-06-15_16-15-46.csv',
 '..\\preprocessed_data_defected\\asphalt_curb-2024-06-15_16-10-24.csv',
 '..\\preprocessed_data_defected\\stones_bad-2024-06-15_15-55-09.csv',
 '..\\preprocessed_data_defected\\stones_stair-2024-06-15_16-05-33.csv',
 '..\\preprocessed_data_defected\\stones_stair-2024-06-15_16-09-28.csv']

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Загружаем данные
file_path = r'..\preprocessed_data_defected\asphalt_curb-2024-06-15_16-10-24.csv'
data = pd.read_csv(file_path)

# Округляем время в секундах до целых чисел для группировки
data['seconds_elapsed_rounded'] = data['seconds_elapsed'].apply(lambda x: int(x))

# Определяем колонки для акселерометра и гироскопа
accelerometer_columns = ['ax', 'ay', 'az']
gyroscope_columns = ['gx', 'gy', 'gz']

# Группируем данные по секундам и агрегируем в списки
grouped_data = data.groupby('seconds_elapsed_rounded')[accelerometer_columns + gyroscope_columns].agg(list)

# Функция для отображения данных на графиках
def plot_sensor_data(sensor_data, sensor_name, columns):
    # Создаем фигуру и оси
    fig, axs = plt.subplots(len(sensor_data), 1, figsize=(10, len(sensor_data)*2), sharex=True)
    if len(sensor_data) == 1:
        axs = [axs]  # Если всего один график, превращаем его в список
    
    for (seconds, row), ax in zip(sensor_data.iterrows(), axs):
        for col in columns:
            ax.plot(row[col], label=col)
        ax.set_title(f'{sensor_name} Data for Second {seconds}')
        ax.legend()
        ax.grid(True)
    
    plt.tight_layout()
    plt.show()

# Отображаем данные акселерометра
plot_sensor_data(grouped_data, 'Accelerometer', accelerometer_columns)

# Отображаем данные гироскопа
plot_sensor_data(grouped_data, 'Gyroscope', gyroscope_columns)

In [43]:
import pandas as pd
import matplotlib.pyplot as plt
from ipywidgets import interact, Button, HBox

# Загружаем данные
file_path = r'..\preprocessed_data_defected\stones_stair-2024-06-15_16-09-28.csv'
data = pd.read_csv(file_path)

# Округляем время в секундах до целых чисел для группировки
data['seconds_elapsed_rounded'] = data['seconds_elapsed'].apply(lambda x: int(x))

# Колонки для акселерометра и гироскопа
accelerometer_columns = ['ax', 'ay', 'az']
gyroscope_columns = ['gx', 'gy', 'gz']

# Колонки, которые не должны быть списками
other_columns = ['time', 'longitude', 'latitude', 'class']

# Группирование и агрегация
aggregations = {**{col: list for col in accelerometer_columns + gyroscope_columns},
                **{col: 'first' for col in other_columns}}
grouped_data = data.groupby('seconds_elapsed_rounded').agg(aggregations).reset_index()

# Добавляем столбец для флага defected
grouped_data['defected'] = False

# Функция для отображения и маркировки данных
def plot_and_label(index):
    row = grouped_data.iloc[index]
    fig, axs = plt.subplots(2, 1, figsize=(10, 6))
    
    # Построение графиков акселерометра
    axs[0].plot(row[accelerometer_columns[0]], label='ax')
    axs[0].plot(row[accelerometer_columns[1]], label='ay')
    axs[0].plot(row[accelerometer_columns[2]], label='az')
    axs[0].set_title('Accelerometer Data')
    axs[0].legend()
    
    # Построение графиков гироскопа
    axs[1].plot(row[gyroscope_columns[0]], label='gx')
    axs[1].plot(row[gyroscope_columns[1]], label='gy')
    axs[1].plot(row[gyroscope_columns[2]], label='gz')
    axs[1].set_title('Gyroscope Data')
    axs[1].legend()
    
    plt.show()
    
    # Кнопки для простановки флага
    true_button = Button(description="Defected True")
    true_button.on_click(lambda b: set_defected(index, True))
    
    false_button = Button(description="Defected False")
    false_button.on_click(lambda b: set_defected(index, False))
    
    display(HBox([true_button, false_button]))

# Установка флага defected
def set_defected(index, value):
    grouped_data.at[index, 'defected'] = value
    print(f'Set defected {value} for index {index}')

# Создание интерактивного элемента для просмотра данных
interact(plot_and_label, index=(0, len(grouped_data)-2))

interactive(children=(IntSlider(value=5, description='index', max=10), Output()), _dom_classes=('widget-intera…

<function __main__.plot_and_label(index)>

Set defected True for index 2


Set defected True for index 3


Set defected True for index 4


Set defected True for index 5


In [44]:
grouped_data = grouped_data.drop(grouped_data.tail(1).index) 
grouped_data.to_csv(file_path)