# Importing Necessary Libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")

# Mounting Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# loading the dataset(combined_data)

In [None]:
df = pd.read_excel('/content/drive/MyDrive/dtw/combined_data.xlsx')
df = df[['time','Ax','Ay','Az','Gx','Gy','Gz','harsh_event']]

# Data Preprocessing

In [None]:
df['time'] = pd.to_datetime(df['time'], format='%d-%m-%Y %H:%M:%S:%f')
df['time Difference'] = df['time'].diff()
print(df['time Difference'].mean())

0 days 00:00:00.019032126


In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33554 entries, 0 to 33553
Data columns (total 9 columns):
 #   Column           Non-Null Count  Dtype          
---  ------           --------------  -----          
 0   time             33554 non-null  datetime64[ns] 
 1   Ax               33554 non-null  float64        
 2   Ay               33554 non-null  float64        
 3   Az               33554 non-null  float64        
 4   Gx               33554 non-null  float64        
 5   Gy               33554 non-null  float64        
 6   Gz               33554 non-null  float64        
 7   harsh_event      33554 non-null  object         
 8   time Difference  33553 non-null  timedelta64[ns]
dtypes: datetime64[ns](1), float64(6), object(1), timedelta64[ns](1)
memory usage: 2.3+ MB


In [None]:
# Replace 'sudden-acc' with 'sudden-acceleration' in the 'harsh_event' column
df['harsh_event'] = df['harsh_event'].replace('sudden-acc', 'sudden-acceleration')

df['harsh_event'].unique()

array(['reverse-safe', 'sudden-acceleration', 'uniform-accel',
       'Safe-no-movement', 'cons-acc-20', 'cons-acc-40', 'sudden-brake',
       'sudden-line-left', 'sudden-left-line-chg', 'cons-20-linechange',
       'sudden-left-turn', 'sudden-line-right', 'sudden-right-line-chg',
       'sudden-right-turn'], dtype=object)

In [None]:
from sklearn.preprocessing import MinMaxScaler

# Select the columns to scale
columns_to_scale = ['Ax', 'Ay', 'Az', 'Gx', 'Gy', 'Gz']

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Fit and transform the selected columns
scaled_values = scaler.fit_transform(df[columns_to_scale])

# Create a DataFrame with the scaled values
df_scaled = pd.DataFrame(scaled_values, columns=columns_to_scale)

# Concatenate the unscaled columns with the scaled columns
df_scaled = pd.concat([df.drop(columns=columns_to_scale), df_scaled], axis=1)

df_scaled.head()

Unnamed: 0,time,harsh_event,time Difference,Ax,Ay,Az,Gx,Gy,Gz
0,2023-03-22 20:00:04.187775,reverse-safe,NaT,0.540944,0.397088,0.58193,0.450125,0.400958,0.424654
1,2023-03-22 20:00:04.584860,reverse-safe,0 days 00:00:00.397085,0.544164,0.381346,0.570576,0.436931,0.398646,0.423825
2,2023-03-22 20:00:04.586927,reverse-safe,0 days 00:00:00.002067,0.556281,0.370205,0.526265,0.417199,0.39812,0.423172
3,2023-03-22 20:00:04.594926,reverse-safe,0 days 00:00:00.007999,0.549937,0.384427,0.53613,0.433531,0.40244,0.421824
4,2023-03-22 20:00:04.602926,reverse-safe,0 days 00:00:00.008000,0.532236,0.412245,0.580169,0.460356,0.40276,0.424362


# Plotting Ay with different harsh events for different values of Ay

In [None]:
import plotly.graph_objects as go

# Define a color map for different labels
color_map = {
    'reverse-safe': 'blue',
    'sudden-acceleration': 'red',
    'uniform-accel': 'green',
    'Safe-no-movement': 'orange',
    'cons-acc-20': 'purple',
    'cons-acc-40': 'yellow',
    'sudden-brake': 'cyan',
    'sudden-line-left': 'magenta',
    'sudden-left-line-chg': 'lime',
    'cons-20-linechange': 'indigo',
    'sudden-left-turn': 'pink',
    'sudden-line-right': 'teal',
    'sudden-right-line-chg': 'brown',
    'sudden-right-turn': 'gold'
}

# Create an empty figure
fig = go.Figure()

# Add traces for each label with unique colors
for label, color in color_map.items():
    # Filter data for the current label
    filtered_data = df_scaled[df_scaled['harsh_event'] == label]

    # Add trace for the current label
    fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['Ay'],
                             mode='lines',
                             name=label,
                             line=dict(color=color)))

# Update layout to include legend
fig.update_layout(title='Ay Plot with Labels',
                  xaxis_title='Index Values',
                  yaxis_title='Ay',
                  width=1500, height=400,
                  legend=dict(title='Labels'))

# Show the plot
fig.show()

# Extracting Reference sequences of Ay for sudden acceleration

In [None]:
start_index, end_index = 600, 800
ref_sudden_acc01 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 980, 1180
ref_sudden_acc02 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 6300, 6500
ref_sudden_acc03 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 8880,9040
ref_sudden_acc04 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 10000, 10600
ref_sudden_acc05 = df_scaled.loc[start_index:end_index, 'Ay'].values

In [None]:
import plotly.graph_objects as go

# Define the new reference sequences
ref_sequences_brake = {
    'ref_sudden_acc01': (600, 800),
    'ref_sudden_acc02': (980, 1180),
    'ref_sudden_acc03': (6300, 6500),
    'ref_sudden_acc04': (8800, 9040),
    'ref_sudden_acc05': (10000, 10600)
}

# Create a new figure
fig = go.Figure()

# Plot each reference sequence with a unique color
for ref_name, (start_index, end_index) in ref_sequences_brake.items():
    ref_sequence = df_scaled.loc[start_index:end_index, 'Ay'].values
    # Plot with index as x-axis
    fig.add_trace(go.Scatter(x=df_scaled.index[start_index:end_index+1], y=ref_sequence, mode='lines', name=ref_name))

fig.update_layout(
    title='Reference Sequences (Acceleration)',
    xaxis_title='Index',
    yaxis_title='Ay',
    width=1000,
    height=400)

fig.show()

# Plotting Gz with different harsh events for different values of Gz

In [None]:
import plotly.graph_objects as go

# Define a color map for different labels
color_map = {
    'reverse-safe': 'blue',
    'sudden-acceleration': 'red',
    'uniform-accel': 'green',
    'Safe-no-movement': 'orange',
    'cons-acc-20': 'purple',
    'cons-acc-40': 'yellow',
    'sudden-brake': 'cyan',
    'sudden-line-left': 'magenta',
    'sudden-left-line-chg': 'lime',
    'cons-20-linechange': 'indigo',
    'sudden-left-turn': 'pink',
    'sudden-line-right': 'teal',
    'sudden-right-line-chg': 'brown',
    'sudden-right-turn': 'gold'
}

# Create an empty figure
fig = go.Figure()

# Add traces for each label with unique colors
for label, color in color_map.items():
    # Filter data for the current label
    filtered_data = df_scaled[df_scaled['harsh_event'] == label]

    # Add trace for the current label
    fig.add_trace(go.Scatter(x=filtered_data.index, y=filtered_data['Gz'],
                             mode='lines',
                             name=label,
                             line=dict(color=color)))

# Update layout to include legend
fig.update_layout(title='Gz Plot with Labels',
                  xaxis_title='Index Values',
                  yaxis_title='Gz',
                  width=1500, height=400,
                  legend=dict(title='Labels'))

# Show the plot
fig.show()

# Extracting Reference sequences of Gz for sudden acceleration

In [None]:
start_index, end_index = 925, 1125
ref_sudden_acc_Gz_01 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 1380, 1480
ref_sudden_acc_Gz_02 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 10634, 10834
ref_sudden_acc_Gz03 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 8402,8543
ref_sudden_acc_Gz_04 = df_scaled.loc[start_index:end_index, 'Ay'].values

start_index, end_index = 8200, 8600
ref_sudden_acc_Gz_05 = df_scaled.loc[start_index:end_index, 'Ay'].values

# Making a dataframe consisting of  dtw distances  with the reference sequences extracted from Ay and Gz

In [None]:
import pandas as pd
import numpy as np

# DTW function
def dtw(x, y):
    n = len(x)
    m = len(y)
    DTW = np.zeros((n + 1, m + 1))
    DTW[:, 0] = np.inf
    DTW[0, :] = np.inf
    DTW[0, 0] = 0

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            cost = abs(x[i - 1] - y[j - 1])
            DTW[i, j] = cost + min(DTW[i - 1, j], DTW[i, j - 1], DTW[i - 1, j - 1])

    return DTW[n, m]

# Function to calculate DTW distance for a list of reference sequences
def calculate_dtw_distance(seq, ref_seqs):
    dtw_distances = []
    for ref_seq in ref_seqs:
        if not np.array_equal(seq, ref_seq):  # Skip self-comparison
            distance = dtw(seq, ref_seq)
            dtw_distances.append(distance)
        else:
            dtw_distances.append(np.inf)  # Assign a high value to skip self-comparisons
    return dtw_distances

# Define the event categories
sudden_acceleration_events = {'sudden-acceleration'}
zero_events = {'reverse-safe', 'uniform-accel', 'Safe-no-movement', 'cons-acc-20', 'cons-acc-40', 'cons-20-linechange'}
skip_events = {'sudden-brake', 'sudden-line-left', 'sudden-left-line-chg', 'sudden-left-turn', 'sudden-line-right', 'sudden-right-line-chg'}

# List of reference sequences
ref_sudden_acc = [ref_sudden_acc01, ref_sudden_acc02, ref_sudden_acc03, ref_sudden_acc04, ref_sudden_acc05]
ref_sudden_acc_Gz = [ref_sudden_acc_Gz_01, ref_sudden_acc_Gz_02, ref_sudden_acc_Gz03, ref_sudden_acc_Gz_04, ref_sudden_acc_Gz_05]

# Generate sequences and calculate DTW costs
sequence_length =75 # example length
dtw_results = []

for i in range(0, len(df_scaled) - sequence_length, sequence_length):
    sequence_ay = df_scaled.loc[i:i + sequence_length - 1, 'Ay'].values
    sequence_gyroz = df_scaled.loc[i:i + sequence_length - 1, 'Gz'].values

    # Get the harsh event for this sequence
    harsh_event = df_scaled.loc[i, 'harsh_event']

    # Determine class and sequence inclusion
    if harsh_event in skip_events:
        continue
    elif harsh_event in sudden_acceleration_events:
        class_value = 1
        class_string = 'sudden-acceleration'
    elif harsh_event in zero_events:
        class_value = 0
        class_string = 'zero-event'
    else:
        # Skip sequences with other harsh_event values
        continue

    # Calculate DTW for Ay and GyroZ separately
    dtw_ay = calculate_dtw_distance(sequence_ay, ref_sudden_acc)
    dtw_gyroz = calculate_dtw_distance(sequence_gyroz, ref_sudden_acc_Gz)

    # Store results
    dtw_results.append([class_value, class_string, list(sequence_ay), list(sequence_gyroz)] + dtw_ay + dtw_gyroz)

# Create a DataFrame to store results
columns = ['Class', 'Class_String', 'Sequence_Ay', 'Sequence_GyroZ'] + [f'DTW_Ay_{i+1}' for i in range(5)] + [f'DTW_GyroZ_{i+1}' for i in range(5)]
df_dtw_results = pd.DataFrame(dtw_results, columns=columns)

# Save to CSV
df_dtw_results.to_csv('dtw_results.csv', index=False)

In [None]:
df=pd.read_csv('dtw_results.csv')
df

Unnamed: 0,Class,Class_String,Sequence_Ay,Sequence_GyroZ,DTW_Ay_1,DTW_Ay_2,DTW_Ay_3,DTW_Ay_4,DTW_Ay_5,DTW_GyroZ_1,DTW_GyroZ_2,DTW_GyroZ_3,DTW_GyroZ_4,DTW_GyroZ_5
0,0,zero-event,"[0.3970875336956283, 0.38134619337728287, 0.37...","[0.42465424137129526, 0.4238252449059724, 0.42...",12.986127,10.243239,3.619330,6.792875,23.794396,21.841908,14.653150,14.329284,8.945111,22.600070
1,0,zero-event,"[0.3896312683832114, 0.40349583016828267, 0.40...","[0.42373770760859325, 0.42325161853362675, 0.4...",13.492777,10.448514,4.170315,6.883662,24.677211,22.016889,14.777375,14.488033,9.236451,22.381119
2,0,zero-event,"[0.3843114706864912, 0.3938731718619613, 0.378...","[0.43969975464518624, 0.4378641182779758, 0.43...",14.044310,10.356561,3.324738,7.914644,24.060919,23.519900,15.862823,15.213612,9.522340,22.776313
3,0,zero-event,"[0.38867042494863807, 0.390336053111909, 0.374...","[0.4360323840252456, 0.43461437541759723, 0.43...",13.140338,10.719822,4.699320,6.123637,25.320159,22.120556,14.878913,14.155016,8.607398,21.488673
4,0,zero-event,"[0.3437062953906295, 0.35132182000313067, 0.36...","[0.42229253012045614, 0.423503578836461, 0.423...",17.081353,10.076356,4.229689,10.324156,26.206283,19.276558,12.832512,12.726435,7.397397,20.136732
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
259,0,zero-event,"[0.3710320419490479, 0.40127523427786227, 0.41...","[0.3495415220250728, 0.34951147089011964, 0.34...",12.997447,10.188893,3.437720,6.787028,23.590253,13.983855,8.231030,13.223402,8.866133,23.450973
260,0,zero-event,"[0.34141580987004133, 0.34437113223061755, 0.3...","[0.40717067294903575, 0.41135929227833434, 0.4...",13.965241,9.212623,3.608237,8.066911,24.232519,20.607881,16.252583,14.788990,11.328859,22.424113
261,0,zero-event,"[0.41852072727384915, 0.40360013054281074, 0.4...","[0.44417052149934755, 0.43046795545316296, 0.4...",12.880276,11.334390,3.722078,6.252502,25.028401,13.285209,7.266961,10.914525,4.582568,13.182751
262,0,zero-event,"[0.3522114665439332, 0.3785135509926191, 0.366...","[0.3979114828230211, 0.39750223821762287, 0.40...",15.708079,10.670547,4.684021,9.837425,26.177426,18.283230,11.682418,12.612426,6.993765,21.307004


# converting our dataframe to a csv file

In [None]:

df.to_csv('dtw_results.csv', index=False)

# Download the file

In [None]:

from google.colab import files
# Download the CSV file
files.download('dtw_results.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>