# Test Bench Data Logger
This project merges multiple logs, cleans data, transforms fields, and prepares an exportable summary.

In [1]:
# Import Required Libraries
import pandas as pd
import os

## Merge Multiple Logs

In [4]:
# Specify the directory containing log files
log_dir = 'G:\\DIY Guru\\Data-Science-and-Engineering-Analytics\\sample_logs'

# Read and merge all CSV files
all_logs = []
for file in os.listdir(log_dir):
    if file.endswith('.csv'):
        file_path = os.path.join(log_dir, file)
        log = pd.read_csv(file_path)
        all_logs.append(log)

# Combine all logs into a single DataFrame
merged_logs = pd.concat(all_logs, ignore_index=True)
print(merged_logs.head())

                    timestamp  sensor_id      value
0  2025-01-01 08:53:51.756968          4  10.806067
1  2025-01-10 16:52:01.575376          3  42.337052
2  2025-01-04 09:19:31.948991          3  51.626520
3  2025-01-10 01:13:29.128424          1  49.903567
4  2025-01-01 23:49:55.384271          5  73.334637


## Clean Data

In [5]:
# Drop duplicate rows
merged_logs.drop_duplicates(inplace=True)

# Handle missing values
merged_logs.fillna(method='ffill', inplace=True)
print(merged_logs.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 808 entries, 0 to 807
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  808 non-null    object 
 1   sensor_id  808 non-null    int64  
 2   value      808 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 19.1+ KB
None


  merged_logs.fillna(method='ffill', inplace=True)


## Transform Fields

In [6]:
# Example: Convert timestamp to datetime
merged_logs['timestamp'] = pd.to_datetime(merged_logs['timestamp'])

# Example: Normalize a numeric field
merged_logs['value'] = (merged_logs['value'] - merged_logs['value'].mean()) / merged_logs['value'].std()
print(merged_logs.head())

                   timestamp  sensor_id     value
0 2025-01-01 08:53:51.756968          4 -1.674792
1 2025-01-10 16:52:01.575376          3 -0.488350
2 2025-01-04 09:19:31.948991          3 -0.138807
3 2025-01-10 01:13:29.128424          1 -0.203638
4 2025-01-01 23:49:55.384271          5  0.678021


## Prepare Exportable Summary

In [8]:
#Exportable Summary
summary = merged_logs.describe()
summary.to_csv('summary_statistics.csv', index=False)
print(summary)
print("Summary statistics saved to summary_statistics.csv")

                           timestamp   sensor_id         value
count                            808  808.000000  8.080000e+02
mean   2025-01-05 20:10:26.220852480    4.949257 -1.099231e-17
min       2025-01-01 00:13:43.843506    1.000000 -1.698531e+00
25%    2025-01-03 09:43:50.470497792    3.000000 -8.674178e-01
50%    2025-01-05 15:57:28.988634880    5.000000 -1.346758e-02
75%    2025-01-08 07:43:55.770214400    7.000000  8.944221e-01
max       2025-01-10 23:51:10.179814    9.000000  1.681137e+00
std                              NaN    2.577887  1.000000e+00
Summary statistics saved to summary_statistics.csv
