In [None]:
# Cell 1: Import Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px

%matplotlib inline
sns.set_style("whitegrid")
sns.set_palette("Blues")

print("Libraries imported.")

In [None]:
# Cell 2: Load and Clean Data
# Load engine_data.csv (relative to notebooks/)
df = pd.read_csv('../data/engine_data.csv')

# Rename columns to snake_case (matching src/data/loader.py)
df.columns = [
    'engine_rpm', 'lub_oil_pressure', 'fuel_pressure',
    'coolant_pressure', 'lub_oil_temp', 'coolant_temp', 'engine_condition'
]

# Preview data
print("Dataset Preview:")
df.head()

In [None]:
# Cell 3: Data Summary
# Statistics
print("Dataset Statistics:")
print(df.describe())

# Missing values
print("\nMissing Values:")
print(df.isnull().sum())

# Engine condition counts
print("\nEngine Condition Counts (1=Healthy, 0=Unhealthy):")
print(df['engine_condition'].value_counts())

In [None]:
# Cell 4: Pairplot
# pairwise relationships
print("Generating Pairplot...")
sns.pairplot(df, hue='engine_condition', diag_kind='hist')
plt.suptitle("Pairplot of Engine Sensors by Condition", y=1.02)
plt.show()

In [None]:
# Cell 5: Violinplot
# Distribution of coolant_temp
print("Generating Violinplot...")
plt.figure(figsize=(8, 6))
sns.violinplot(x='engine_condition', y='coolant_temp', data=df)
plt.title("Coolant Temperature by Engine Condition")
plt.xlabel("Engine Condition (1=Healthy, 0=Unhealthy)")
plt.ylabel("Coolant Temperature (°C)")
plt.show()

In [None]:
# Cell 6: Heatmap
# Correlation matrix
print("Generating Heatmap...")
plt.figure(figsize=(10, 8))
corr = df.drop('engine_condition', axis=1).corr()
sns.heatmap(corr, annot=True, cmap='Blues', vmin=-1, vmax=1)
plt.title("Correlation Heatmap of Engine Sensors")
plt.show()

In [None]:
# Cell 7: Boxplot
# Outliers in engine_rpm
print("Generating Boxplot...")
plt.figure(figsize=(8, 6))
sns.boxplot(x='engine_condition', y='engine_rpm', data=df)
plt.title("Engine RPM by Engine Condition")
plt.xlabel("Engine Condition (1=Healthy, 0=Unhealthy)")
plt.ylabel("Engine RPM")
plt.show()

In [None]:
# Cell 8: Scatter Plot
# Engine_rpm vs. lub_oil_pressure
print("Generating Scatter Plot...")
plt.figure(figsize=(8, 6))
sns.scatterplot(x='engine_rpm', y='lub_oil_pressure', hue='engine_condition', data=df)
plt.title("Engine RPM vs. Lubrication Oil Pressure")
plt.xlabel("Engine RPM")
plt.ylabel("Lubrication Oil Pressure (bar)")
plt.show()

In [None]:
# Cell 9: Plotly Boxplot
print("Generating Plotly Boxplot...")
fig = px.box(df, x='engine_condition', y='engine_rpm', color='engine_condition',
             title="Engine RPM by Engine Condition")
fig.update_layout(xaxis_title="Engine Condition (1=Healthy, 0=Unhealthy)",
                  yaxis_title="Engine RPM")
fig.show()

In [None]:
# Cell 10: Plotly Scatter
print("Generating Plotly Scatter...")
fig = px.scatter(df, x='engine_rpm', y='lub_oil_pressure', color='engine_condition',
                 title="Engine RPM vs. Lubrication Oil Pressure")
fig.update_layout(xaxis_title="Engine RPM", yaxis_title="Lubrication Oil Pressure (bar)")
fig.show()