In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Step 1: Ingest Data from CSVs
def ingest_data(csv_files):
   
    dfs = []
    for file in csv_files:
        df = pd.read_csv(file)
        dfs.append(df)
    combined_df = pd.concat(dfs, ignore_index=True)
    return combined_df

# Step 2: Handle Missing Values
def handle_missing_values(df, method='interpolate'):
    
    # Convert timestamp to datetime 
    df['Timestamp'] = pd.to_datetime(df['Timestamp'])
    df = df.set_index('Timestamp')
    
    # Interpolate missing values in sensor columns
    sensor_cols = ['Vibration (m/s²)', 'Temperature (°C)']  
    df[sensor_cols] = df[sensor_cols].interpolate(method='linear', limit_direction='both')
    
    df = df.dropna()
    
    return df.reset_index()

# Main Function to Orchestrate the Process
def transform_sensor_logs(csv_files, output_file='modeling_dataset.csv'):
   
    # Ingest
    df = ingest_data(csv_files)
    print(f"Step 1: Ingested data shape: {df.shape}")
    
    # Handle missing values
    df = handle_missing_values(df)
    print(f"Step 2: After handling missing values: {df.shape}")
    
# Example Usage
if __name__ == "__main__":
    csv_files = ['sensor_maintenance_data.csv']  
    transformed_df = transform_sensor_logs(csv_files)
    


Step 1: Ingested data shape: (500, 27)
Step 2: After handling missing values: (200, 27)
