<a href="https://colab.research.google.com/github/innovate-data/PDM/blob/main/Python_Code_Basic_Sensor_Data_Analysis.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import io # Used to simulate a CSV file for demonstration

# --- 1. Simulate or Load Data ---
# In a real scenario, you would replace this simulated CSV
# with the actual path to your file, like:
# csv_filepath = 'path/to/your/sensor_data.csv'
# df = pd.read_csv(csv_filepath, parse_dates=['timestamp'])

# For demonstration, let's create a sample CSV string
# This assumes your CSV has columns: timestamp, vibration, temperature, pressure, acoustics
csv_data = """timestamp,vibration,temperature,pressure,acoustics
2025-04-20 19:00:00,0.5,70.1,14.7,60.2
2025-04-20 19:01:00,0.6,70.3,14.7,61.5
2025-04-20 19:02:00,0.5,70.2,14.8,60.8
2025-04-20 19:03:00,1.5,75.5,14.7,85.3
2025-04-20 19:04:00,0.4,70.5,14.6,62.1
2025-04-20 19:05:00,0.5,70.4,14.7,60.5
"""

# Use io.StringIO to read the string data as if it were a file
# Use parse_dates=['timestamp'] to automatically convert the timestamp column
df = pd.read_csv(io.StringIO(csv_data), parse_dates=['timestamp'])

# Set the timestamp as the index (often useful for time-series analysis)
df.set_index('timestamp', inplace=True)

print("--- Successfully Loaded Data ---")
print(df.head()) # Display the first few rows
print("\n")


# --- 2. Basic Data Exploration ---
print("--- Basic Statistics ---")
# Get summary statistics for each sensor column
print(df.describe())
print("\n")


# --- 3. Simple Anomaly Detection (Example: Threshold Check) ---
# Define some arbitrary thresholds for demonstration
# In a real system, these would be determined by equipment specifications,
# historical data analysis, or machine learning models.
thresholds = {
    'vibration': 1.0,
    'temperature': 75.0,
    'pressure': 14.9, # Example: High pressure threshold
    'acoustics': 80.0
}

print("--- Checking for Threshold Breaches (Simple Anomalies) ---")
# Check each sensor against its threshold
for sensor, threshold_value in thresholds.items():
    # Find rows where the sensor value exceeds the threshold
    anomalies = df[df[sensor] > threshold_value]
    if not anomalies.empty:
        print(f"Potential anomalies detected for '{sensor}' (Threshold > {threshold_value}):")
        print(anomalies)
        print("\n")
    else:
        print(f"No values exceeded the threshold for '{sensor}'.\n")

# --- 4. Further Steps (Conceptual) ---
# - Visualization: Plot sensor readings over time to spot trends.
# - Feature Engineering: Create new features (e.g., rolling averages, rate of change).
# - Machine Learning: Train models (e.g., Isolation Forest, LSTM, ARIMA)
#   on historical data to detect more complex anomalies or predict failures.
# - Integration: Feed detected anomalies or predictions into an alerting system
#   or a maintenance management system (CMMS).

print("--- Analysis Complete ---")
print("Note: This is a basic example. Real-world predictive maintenance")
print("often involves more sophisticated data cleaning, feature engineering,")
print("and machine learning models.")

--- Successfully Loaded Data ---
                     vibration  temperature  pressure  acoustics
timestamp                                                       
2025-04-20 19:00:00        0.5         70.1      14.7       60.2
2025-04-20 19:01:00        0.6         70.3      14.7       61.5
2025-04-20 19:02:00        0.5         70.2      14.8       60.8
2025-04-20 19:03:00        1.5         75.5      14.7       85.3
2025-04-20 19:04:00        0.4         70.5      14.6       62.1


--- Basic Statistics ---
       vibration  temperature   pressure  acoustics
count   6.000000     6.000000   6.000000   6.000000
mean    0.666667    71.166667  14.700000  65.066667
std     0.413118     2.127596   0.063246   9.936331
min     0.400000    70.100000  14.600000  60.200000
25%     0.500000    70.225000  14.700000  60.575000
50%     0.500000    70.350000  14.700000  61.150000
75%     0.575000    70.475000  14.700000  61.950000
max     1.500000    75.500000  14.800000  85.300000


--- Checking fo