In [None]:
Title: etl_pipeline_temperature_logs.py
Purpose: Loads raw temperature logs, cleans noise, flags anomalies, outputs processed dataset
Relevance: HVAC and cooling systems rely heavily on temperature data

A simple ETL workflow simulating temperature-sensor data cleaning similar to industrial cooling system pipelines.

In [2]:
# Import pandas library for data handling and analysis
import pandas as pd

# Extract step: Load raw data from a CSV file into a DataFrame
def extract(file_path):
    # Read the CSV file located at 'file_path' and return as a DataFrame
    return pd.read_csv(file_path)

# Transform step: Clean and process the data
def transform(df):
    df = df.copy()  # Work on a copy to avoid modifying the original DataFrame
    
    # Ensure the 'temperature' column is numeric (float) for calculations
    df['temperature'] = df['temperature'].astype(float)
    
    # Filter out unrealistic temperature values (keep only between -40 and 80 degrees)
    df = df[(df['temperature'] > -40) & (df['temperature'] < 80)]
    
    # Create a new column 'anomaly' to flag abnormal readings:
    # 1 if temperature > 60 or < 0, otherwise 0
    df['anomaly'] = df['temperature'].apply(lambda x: 1 if x > 60 or x < 0 else 0)
    
    return df

# Load step: Save the processed data back to a CSV file
def load(df, output_file="processed_temperature_logs.csv"):
    # Write the DataFrame to a CSV file without including the index column
    df.to_csv(output_file, index=False)
    print("ETL completed successfully.")  # Confirmation message

# Main execution block
if __name__ == "__main__":
    # Step 1: Extract raw data from the input CSV file
    data = extract("raw_temperature_logs.csv")
    
    # Step 2: Transform the data (cleaning, filtering, anomaly detection)
    cleaned = transform(data)
    
    # Step 3: Load the processed data into a new CSV file
    load(cleaned)


File not found.
