In [1]:
import pandas as pd
import plotly.graph_objects as go

# Load data from the Excel file
file_path = 'data1.xlsx'  # Replace with actual path to the .xlsx file
df = pd.read_excel(file_path)

# Convert timestamp column to datetime format
df['timestamp'] = pd.to_datetime(df['time_stamp'], format='%d/%m/%Y %H:%M:%S')  # Adjust the column name if necessary

# Filter out rows where trip is 0 (vehicle off periods)
df_active = df[df['trip'] > 0]

# Aggregate data by trip
trip_stats = df_active.groupby('trip').agg(
    start_time=('timestamp', 'min'),
    end_time=('timestamp', 'max'),
    avg_speed=('vehicle_speed', 'mean'),
    top_speed=('vehicle_speed', 'max'),
    initial_speed=('vehicle_speed', 'first')  # First recorded speed for each trip
).reset_index()

# Calculate additional metrics
trip_stats['trip_duration'] = (trip_stats['end_time'] - trip_stats['start_time']).dt.total_seconds() / 3600  # Trip duration in hours
trip_stats['distance_covered'] = trip_stats['avg_speed'] * trip_stats['trip_duration']  # Distance = speed * time

# Create a figure for plotting
fig = go.Figure()

# Add line trace for vehicle speed over time
fig.add_trace(go.Scatter(
    x=df['timestamp'],
    y=df['vehicle_speed'],
    mode='lines',
    name='Vehicle Speed',
    line=dict(color='blue', width=1),
    hoverinfo='x+y'
))

# Highlight points where engine_rpm > 4000
high_rpm = df[df['engine_rpm'] > 4000]

fig.add_trace(go.Scatter(
    x=high_rpm['timestamp'],
    y=high_rpm['vehicle_speed'],
    mode='markers',
    name='RPM > 4000',
    marker=dict(color='red', size=8, symbol='circle'),
    hoverinfo='x+y'
))

# Add lines for average speed, top speed, distance covered, and average acceleration for each trip
for _, row in trip_stats.iterrows():
    # Average speed trace
    fig.add_trace(go.Scatter(
        x=[row['start_time'], row['end_time']],
        y=[row['avg_speed'], row['avg_speed']],
        mode='lines',
        name=f'Avg Speed Trip {row["trip"]}',
        line=dict(color='red', width=2, dash='dash')
    ))

    # Top speed trace
    fig.add_trace(go.Scatter(
        x=[row['start_time'], row['end_time']],
        y=[row['top_speed'], row['top_speed']],
        mode='lines',
        name=f'Top Speed Trip {row["trip"]}',
        line=dict(color='green', width=2, dash='dot')
    ))

    # Annotate distance covered for the trip
    fig.add_trace(go.Scatter(
        x=[row['end_time']],
        y=[row['top_speed']],
        mode='markers+text',
        text=f'Distance: {row["distance_covered"]:.2f} km',
        textposition='top center',
        showlegend=False
    ))



# Update layout with interactive features
fig.update_layout(
    title='Vehicle Speed Over Time with Trip Statistics',
    xaxis=dict(
        title='Time',
        type='date',
        rangeslider=dict(visible=True),  # Enable range slider for zoom control
        showspikes=True,
        spikemode='across',  # Spike line when hovering
        spikedash='dash',
    ),
    yaxis=dict(
        title='Speed (km/h)',
        range=[0, df['vehicle_speed'].max() + 10],  # Adjust the y-axis range
        showspikes=True,  # Spike on hover
    ),
    hovermode="x unified",  # Show single hover label across traces
    template="plotly_white"  # Clean and clear look
)

# Display the figure
fig.show()


In [2]:
import pandas as pd
import plotly.graph_objects as go
import numpy as np

def load_and_prepare_data(file_path):
    # Load data
    df = pd.read_excel(file_path)

    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['time_stamp'], format='%d/%m/%Y %H:%M:%S')

    # Filter out inactive trips
    df_active = df[df['trip'] > 0]
    return df, df_active

def calculate_acceleration(df_active):
    # Calculate time differences in seconds
    df_active['time_diff'] = df_active['timestamp'].diff().dt.total_seconds()

    # Calculate speed differences (assuming speed is in km/h)
    df_active['speed_diff'] = df_active['vehicle_speed'].diff()

    # Calculate acceleration in km/h² (change in speed divided by time in hours)
    df_active['acceleration'] = df_active['speed_diff'] / df_active['time_diff']

    # Drop any NaN values (first row will have NaN due to diff)
    df_active = df_active.dropna(subset=['acceleration'])

    return df_active

def calculate_rms_acceleration(df_active):
    # Calculate RMS acceleration for each trip
    trip_rms_accel = df_active.groupby('trip').apply(lambda x: np.sqrt(np.mean(x['acceleration']**2)))
    return trip_rms_accel

def aggregate_trip_stats(df_active, trip_rms_accel):
    # Aggregate data by trip
    trip_stats = df_active.groupby('trip').agg(
        start_time=('timestamp', 'min'),
        end_time=('timestamp', 'max'),
        avg_speed=('vehicle_speed', 'mean'),
        top_speed=('vehicle_speed', 'max'),
        initial_speed=('vehicle_speed', 'first'),
        avg_acceleration=('acceleration', 'mean')
    ).reset_index()

    # Add RMS acceleration to trip stats
    trip_stats['rms_acceleration'] = trip_rms_accel.values

    return trip_stats

def classify_aggressive_trips(trip_stats, rms_threshold=2.0):
    # Classify trips as aggressive or non-aggressive based on RMS acceleration
    trip_stats['is_aggressive'] = trip_stats['rms_acceleration'] > rms_threshold

    # Print classification result
    print(trip_stats[['trip', 'rms_acceleration', 'is_aggressive']])

    return trip_stats

def plot_data(df, trip_stats):
    fig = go.Figure()

    # Vehicle speed over time
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['vehicle_speed'],
        mode='lines',
        name='Vehicle Speed',
        line=dict(color='blue', width=1),
        hoverinfo='x+y'
    ))

    # Plot acceleration over time
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['acceleration'],
        mode='lines',
        name='Acceleration',
        line=dict(color='orange', width=1),
        hoverinfo='x+y'
    ))

    # Add traces for each trip's average acceleration and classify aggressive trips
    for _, row in trip_stats.iterrows():
        color = 'red' if row['is_aggressive'] else 'green'
        fig.add_trace(go.Scatter(
            x=[row['start_time'], row['end_time']],
            y=[row['avg_acceleration'], row['avg_acceleration']],
            mode='lines+markers',
            name=f'Avg Acc Trip {row["trip"]} ({ "Aggressive" if row["is_aggressive"] else "Non-aggressive" })',
            line=dict(color=color, width=2, dash='dash'),
            marker=dict(symbol='circle', size=6),
            hoverinfo='x+y+text',
            text=f'RMS Acceleration: {row["rms_acceleration"]:.2f} km/h/s'
        ))

    # Update layout
    fig.update_layout(
        title='Vehicle Speed and Acceleration Over Time (Aggressive vs Non-Aggressive)',
        xaxis=dict(
            title='Time',
            type='date',
            rangeslider=dict(visible=True),
            showspikes=True,
            spikemode='across',
            spikedash='dash'
        ),
        yaxis=dict(
            title='Speed (km/h) / Acceleration (km/h/s)',
            showspikes=True
        ),
        hovermode="x unified",
        template="plotly_white"
    )

    fig.show()

# File path
file_path = 'data1.xlsx'  # Replace with the actual path

# Execute functions
df, df_active = load_and_prepare_data(file_path)
df_active = calculate_acceleration(df_active)
trip_rms_accel = calculate_rms_acceleration(df_active)
trip_stats = aggregate_trip_stats(df_active, trip_rms_accel)
trip_stats = classify_aggressive_trips(trip_stats, rms_threshold=2.0)
plot_data(df_active, trip_stats)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





    trip  rms_acceleration  is_aggressive
0      1          2.161206           True
1      2          2.041039           True
2      3          1.704241          False
3      4          2.250450           True
4      5          4.217095           True
..   ...               ...            ...
65    66          0.992157          False
66    67          1.925593          False
67    68          2.255262           True
68    69          2.236650           True
69    70          1.484010          False

[70 rows x 3 columns]


In [3]:
import pandas as pd
import plotly.graph_objects as go

def load_and_prepare_data(file_path):
    # Load data
    df = pd.read_excel(file_path)

    # Convert timestamp to datetime
    df['timestamp'] = pd.to_datetime(df['time_stamp'], format='%d/%m/%Y %H:%M:%S')

    # Filter out inactive trips
    df_active = df[df['trip'] > 0]
    return df, df_active

def calculate_acceleration(df_active):
    # Calculate time differences in seconds
    df_active['time_diff'] = df_active['timestamp'].diff().dt.total_seconds()

    # Calculate speed differences (assuming speed is in km/h)
    df_active['speed_diff'] = df_active['vehicle_speed'].diff()

    # Calculate acceleration in km/h² (change in speed divided by time in hours)
    df_active['acceleration'] = df_active['speed_diff'] / (df_active['time_diff'] / 3600)


    # Drop any NaN values (first row will have NaN due to diff)
    df_active = df_active.dropna(subset=['acceleration'])

    return df_active

def aggregate_trip_stats(df_active):
    # Aggregate data by trip
    trip_stats = df_active.groupby('trip').agg(
        start_time=('timestamp', 'min'),
        end_time=('timestamp', 'max'),
        avg_acceleration=('acceleration', 'mean')
    ).reset_index()

    return trip_stats

def plot_acceleration(df, trip_stats):
    fig = go.Figure()

    # Plot acceleration over time
    fig.add_trace(go.Scatter(
        x=df['timestamp'],
        y=df['acceleration'],
        mode='lines',
        name='Acceleration',
        line=dict(color='orange', width=1),
        hoverinfo='x+y'
    ))

    # Add traces for each trip's average acceleration
    for _, row in trip_stats.iterrows():
        fig.add_trace(go.Scatter(
            x=[row['start_time'], row['end_time']],
            y=[row['avg_acceleration'], row['avg_acceleration']],
            mode='lines',
            name=f'Avg Acceleration Trip {row["trip"]}',
            line=dict(color='red', width=2, dash='dash')
        ))

    # Update layout
    fig.update_layout(
        title='Acceleration Over Time',
        xaxis=dict(
            title='Time',
            type='date',
            rangeslider=dict(visible=True),
            showspikes=True,
            spikemode='across',
            spikedash='dash'
        ),
        yaxis=dict(
            title='Acceleration (km/h²)',
            showspikes=True
        ),
        hovermode="x unified",
        template="plotly_white"
    )

    fig.show()

# File path
file_path = 'data1.xlsx'  # Replace with the actual path

# Execute functions
df, df_active = load_and_prepare_data(file_path)
df_active = calculate_acceleration(df_active)
trip_stats = aggregate_trip_stats(df_active)
plot_acceleration(df_active, trip_stats)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [7]:
import pandas as pd
import plotly.graph_objects as go

# Load data from the Excel file
file_path = 'data1.xlsx'  # Replace with actual path to the .xlsx file
df = pd.read_excel(file_path)

# Convert timestamp column to datetime format
df['timestamp'] = pd.to_datetime(df['time_stamp'], format='%d/%m/%Y %H:%M:%S')  # Adjust the column name if necessary

# Filter out rows where trip is 0 (vehicle off periods)
df_active = df[df['trip'] > 0]

# Adjusted parameters for fuel consumption estimation (empirical values)
fuel_consumption_rate_per_rpm = 0.0005  # Example: liters per RPM, adjust this
fuel_consumption_rate_per_load = 0.02  # Example: liters per engine load unit, adjust this

# Calculate fuel consumption rate (L/h) using RPM and engine load
df_active['fuel_consumption_l_per_h'] = (df_active['engine_rpm'] * fuel_consumption_rate_per_rpm) + (df_active['calculated_engine_load'] * fuel_consumption_rate_per_load)

# Avoid division by zero for speed = 0
df_active['vehicle_speed'] = df_active['vehicle_speed'].replace(0, 1e-5)

# Calculate fuel consumption in kilometers per liter (km/L)
df_active['fuel_consumption_km_per_l'] = df_active['vehicle_speed'] / df_active['fuel_consumption_l_per_h']

# Replace infinite values (result of division by zero) with NaN
df_active['fuel_consumption_km_per_l'].replace([float('inf'), -float('inf')], float('nan'), inplace=True)

# Aggregate data by trip
trip_stats = df_active.groupby('trip').agg(
    start_time=('timestamp', 'min'),
    end_time=('timestamp', 'max'),
    avg_speed=('vehicle_speed', 'mean'),
    top_speed=('vehicle_speed', 'max'),
    initial_speed=('vehicle_speed', 'first'),  # First recorded speed for each trip
    avg_fuel_consumption=('fuel_consumption_km_per_l', 'mean')  # Average fuel consumption per trip
).reset_index()

# Calculate additional metrics
trip_stats['trip_duration'] = (trip_stats['end_time'] - trip_stats['start_time']).dt.total_seconds() / 3600  # Trip duration in hours
trip_stats['distance_covered'] = trip_stats['avg_speed'] * trip_stats['trip_duration']  # Distance = speed * time

# Create a figure for plotting
fig = go.Figure()

# Add line trace for vehicle speed over time
fig.add_trace(go.Scatter(
    x=df_active['timestamp'],
    y=df_active['vehicle_speed'],
    mode='lines',
    name='Vehicle Speed',
    line=dict(color='blue', width=1),
    hoverinfo='x+y'
))

# Highlight points where engine_rpm > 4000
high_rpm = df_active[df_active['engine_rpm'] > 4000]
fig.add_trace(go.Scatter(
    x=high_rpm['timestamp'],
    y=high_rpm['vehicle_speed'],
    mode='markers',
    name='RPM > 4000',
    marker=dict(color='red', size=8, symbol='circle'),
    hoverinfo='x+y'
))

# Add lines for average speed, top speed, distance covered, and average fuel consumption for each trip
for _, row in trip_stats.iterrows():
    # Average speed trace
    fig.add_trace(go.Scatter(
        x=[row['start_time'], row['end_time']],
        y=[row['avg_speed'], row['avg_speed']],
        mode='lines',
        name=f'Avg Speed Trip {row["trip"]}',
        line=dict(color='red', width=2, dash='dash')
    ))

    # Top speed trace
    fig.add_trace(go.Scatter(
        x=[row['start_time'], row['end_time']],
        y=[row['top_speed'], row['top_speed']],
        mode='lines',
        name=f'Top Speed Trip {row["trip"]}',
        line=dict(color='green', width=2, dash='dot')
    ))

    # Annotate distance covered for the trip
    fig.add_trace(go.Scatter(
        x=[row['end_time']],
        y=[row['top_speed']],
        mode='markers+text',
        text=f'Distance: {row["distance_covered"]:.2f} km',
        textposition='top center',
        showlegend=False
    ))

    # Annotate average fuel consumption for the trip (without plotting fuel consumption line)
    fig.add_trace(go.Scatter(
        x=[row['end_time']],
        y=[row['avg_speed']],
        mode='markers+text',
        text=f'Avg Fuel: {row["avg_fuel_consumption"]:.2f} km/L',
        textposition='bottom center',
        showlegend=False
    ))


# Update layout with interactive features
fig.update_layout(
    title='Vehicle Speed Over Time with Trip Statistics',
    xaxis=dict(
        title='Time',
        type='date',
        rangeslider=dict(visible=True),  # Enable range slider for zoom control
        showspikes=True,
        spikemode='across',  # Spike line when hovering
        spikedash='dash',
    ),
    yaxis=dict(
        title='Speed (km/h)',
        range=[0, df_active['vehicle_speed'].max() + 10],  # Adjust the y-axis range
        showspikes=True,  # Spike on hover
    ),
    hovermode="x unified",  # Show single hover label across traces
    template="plotly_white"  # Clean and clear look
)


# Display the faigure
fig.show()




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


A value is trying to be set on a copy of a DataFrame or Series through chained assignment using an inplace method.
The behavior will change in pandas 3.0. This inplace method will never work because th

In [1]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go

# Load data from the Excel file
file_path = 'data1.xlsx'  
df = pd.read_excel(file_path)

# Convert timestamp column to datetime format
df['timestamp'] = pd.to_datetime(df['time_stamp'], format='%d/%m/%Y %H:%M:%S')

# Filter out rows where trip is 0 (vehicle off periods)
df_active = df[df['trip'] > 0].copy()

# Adjusted parameters for fuel consumption estimation
fuel_consumption_rate_per_rpm = 0.0005  # liters per RPM
fuel_consumption_rate_per_load = 0.02    # liters per engine load unit

# Calculate time differences and speed differences
df_active['time_diff'] = df_active['timestamp'].diff().dt.total_seconds()
df_active['speed_diff'] = df_active['vehicle_speed'].diff()

# Calculate acceleration in km/h² and drop NaN values
df_active['acceleration'] = df_active['speed_diff'] / df_active['time_diff'].replace(0, np.nan)
df_active.dropna(subset=['acceleration'], inplace=True)

# Calculate fuel consumption
df_active['fuel_consumption_l_per_h'] = (df_active['engine_rpm'] * fuel_consumption_rate_per_rpm) + \
                                          (df_active['calculated_engine_load'] * fuel_consumption_rate_per_load)
df_active['vehicle_speed'].replace(0, 1e-5, inplace=True)  # Avoid division by zero
df_active['fuel_consumption_km_per_l'] = df_active['vehicle_speed'] / df_active['fuel_consumption_l_per_h']
df_active['fuel_consumption_km_per_l'].replace([float('inf'), -float('inf')], np.nan, inplace=True)

# Aggregate data by trip
trip_stats = df_active.groupby('trip').agg(
    start_time=('timestamp', 'min'),
    end_time=('timestamp', 'max'),
    avg_speed=('vehicle_speed', 'mean'),
    top_speed=('vehicle_speed', 'max'),
    avg_acceleration=('acceleration', 'mean'),
    initial_speed=('vehicle_speed', 'first'),
    avg_fuel_consumption=('fuel_consumption_km_per_l', 'mean')  # Add this line
).reset_index()

# Calculate trip duration and distance covered
trip_stats['trip_duration'] = (trip_stats['end_time'] - trip_stats['start_time']).dt.total_seconds() / 3600  # in hours
trip_stats['distance_covered'] = trip_stats['avg_speed'] * trip_stats['trip_duration']

# Add RMS acceleration to trip stats
trip_rms_accel = df_active.groupby('trip')['acceleration'].apply(lambda x: np.sqrt(np.mean(x**2)))
trip_stats['rms_acceleration'] = trip_rms_accel.values

# Classify trips as aggressive or non-aggressive
trip_stats['is_aggressive'] = trip_stats['rms_acceleration'] > 2.0

# Print classification result
print(trip_stats[['trip', 'rms_acceleration', 'is_aggressive']])

# Create a figure for plotting
fig = go.Figure()

# Add vehicle speed and acceleration traces
fig.add_trace(go.Scatter(x=df_active['timestamp'], y=df_active['vehicle_speed'], mode='lines', name='Vehicle Speed', line=dict(color='blue')))
fig.add_trace(go.Scatter(x=df_active['timestamp'], y=df_active['acceleration'], mode='lines', name='Acceleration', line=dict(color='orange')))

# Highlight points where engine_rpm > 4000
high_rpm = df_active[df_active['engine_rpm'] > 4000]
fig.add_trace(go.Scatter(x=high_rpm['timestamp'], y=high_rpm['vehicle_speed'], mode='markers', name='RPM > 4000', marker=dict(color='red', size=8)))

# Add trip statistics traces
for _, row in trip_stats.iterrows():
    fig.add_trace(go.Scatter(x=[row['start_time'], row['end_time']], y=[row['avg_speed']] * 2, mode='lines', name=f'Avg Speed Trip {row["trip"]}', line=dict(color='red', dash='dash')))
    fig.add_trace(go.Scatter(x=[row['start_time'], row['end_time']], y=[row['top_speed']] * 2, mode='lines', name=f'Top Speed Trip {row["trip"]}', line=dict(color='green', dash='dot')))
    
    # Annotations for distance and fuel consumption
    fig.add_trace(go.Scatter(x=[row['end_time']], y=[row['top_speed']], mode='markers+text', text=f'Distance: {row["distance_covered"]:.2f} km', textposition='top center', showlegend=False))
    fig.add_trace(go.Scatter(x=[row['end_time']], y=[row['avg_speed']], mode='markers+text', text=f'Avg Fuel: {row["avg_fuel_consumption"]:.2f} km/L', textposition='bottom center', showlegend=False))
    
    # Add average acceleration trace
    color = 'red' if row['is_aggressive'] else 'green'
    fig.add_trace(go.Scatter(x=[row['start_time'], row['end_time']], y=[row['avg_acceleration']] * 2, mode='lines+markers', name=f'Avg Acc Trip {row["trip"]} ({ "Aggressive" if row["is_aggressive"] else "Non-aggressive" })', line=dict(color=color, dash='dash')))

# Update layout with interactive features
fig.update_layout(
    title='Vehicle Speed and Acceleration Over Time with Trip Statistics (Aggressive vs Non-Aggressive)',
    xaxis=dict(title='Time', type='date', rangeslider=dict(visible=True), showspikes=True, spikemode='across', spikedash='dash'),
    yaxis=dict(title='Speed (km/h) / Acceleration (km/h/s)', range=[0, df_active['vehicle_speed'].max() + 10], showspikes=True),
    hovermode="x unified",
    template="plotly_white"
)

# Display the figure
fig.show()


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_active['vehicle_speed'].replace(0, 1e-5, inplace=True)  # Avoid division by zero
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_active['fuel_consumption_km_per_l'].replace([float('inf'), -float('inf')], np.nan, inplace=True)


    trip  rms_acceleration  is_aggressive
0      1          2.161206           True
1      2          2.041039           True
2      3          1.704241          False
3      4          2.250450           True
4      5          4.217095           True
..   ...               ...            ...
65    66          0.992157          False
66    67          1.925593          False
67    68          2.255262           True
68    69          2.236650           True
69    70          1.484010          False

[70 rows x 3 columns]


In [2]:
trip_stats

Unnamed: 0,trip,start_time,end_time,avg_speed,top_speed,avg_acceleration,initial_speed,avg_fuel_consumption,trip_duration,distance_covered,rms_acceleration,is_aggressive
0,1,2024-08-01 11:46:03,2024-08-01 11:58:56,25.081153,62.0,0.003927,0.00001,14.053039,0.214722,5.385481,2.161206,True
1,2,2024-08-01 22:03:44,2024-08-01 22:07:04,28.227723,54.0,-0.193059,42.00000,15.052888,0.055556,1.568207,2.041039,True
2,3,2024-08-01 22:26:47,2024-08-01 22:37:46,13.834864,45.0,-0.068699,45.00000,8.929524,0.183056,2.532549,1.704241,False
3,4,2024-08-02 08:46:51,2024-08-02 17:04:29,20.718312,59.0,0.003873,0.00001,12.105640,8.293889,171.835375,2.250450,True
4,5,2024-08-02 17:29:19,2024-08-02 17:29:57,9.777780,32.0,0.049888,0.00001,8.300566,0.010556,0.103210,4.217095,True
...,...,...,...,...,...,...,...,...,...,...,...,...
65,66,2024-08-29 18:27:24,2024-08-29 18:27:54,5.000000,9.0,-0.031248,4.00000,3.709450,0.008333,0.041667,0.992157,False
66,67,2024-08-29 20:55:55,2024-08-29 21:18:29,18.212928,50.0,0.017871,4.00000,11.246424,0.376111,6.850085,1.925593,False
67,68,2024-08-30 09:39:41,2024-08-30 09:41:35,25.224138,53.0,-0.051722,9.00000,13.867949,0.031667,0.798764,2.255262,True
68,69,2024-08-30 17:46:34,2024-08-30 17:49:44,29.104167,59.0,-0.151031,32.00000,16.303195,0.052778,1.536053,2.236650,True


In [3]:
trip_stats.avg_fuel_consumption.min()

3.709450184549267

In [4]:
trip_stats.avg_fuel_consumption.max()

20.0464435584781

In [5]:
trip_stats.avg_fuel_consumption.mean()

11.810206232433856

In [6]:
trip_stats['distance_covered'].sum()

464.7116423751581

In [19]:
trip_stats['distance_covered'].mean()

6.638737748216544

In [15]:
from datetime import datetime

# Convert time columns to datetime format
trip_stats['end_time'] = pd.to_datetime(trip_stats['end_time'])

# Total distance covered in all trips
total_distance = trip_stats['distance_covered'].sum()

# Count how often RPM exceeded 4000
high_rpm_events = df[df['engine_rpm'] > 4000].shape[0]

# Check if time difference between the current date and last trip's end_time is greater than 3 months
last_trip_date = trip_stats['end_time'].max()  # Get the date of the last trip
current_date = datetime.now()
time_difference = (current_date - last_trip_date).days / 30  # Convert days to months

# Oil change recommendation logic
if total_distance > 500 or high_rpm_events > 100 or time_difference > 3:
    print("Recommendation: It's time for an oil change.")
else:
    print("No oil change needed yet.")

No oil change needed yet.


In [14]:


# Convert time columns to datetime format
trip_stats['end_time'] = pd.to_datetime(trip_stats['end_time'])

# Total distance covered in all trips
total_distance = trip_stats['distance_covered'].sum()

# Count how often the coolant temperature exceeds 100°C
high_temp_events = df[df['engine_coolant_temperature'] > 100].shape[0]

# Get the date of the last trip
last_trip_date = trip_stats['end_time'].max()

# Calculate the time difference in months
current_date = datetime.now()
time_difference = (current_date - last_trip_date).days / 30  # Convert days to months

# Coolant change recommendation logic
if high_temp_events > 1000 or total_distance > 7000 or time_difference > 6:
    print("Recommendation: It's time for a coolant change.")
else:
    print("No coolant change needed yet.")


No coolant change needed yet.
