# Test Bench Data Logger
This project merges multiple logs, cleans data, transforms fields, and prepares an exportable summary.

In [1]:
# Import Required Libraries
import pandas as pd
import os

## Merge Multiple Logs

In [3]:
# Specify the directory containing log files
log_dir = r"G:\DIYguru\Data-Science-and-Engineering-Analytics\Projects\Mini_Projects\sample_logs"

# Read and merge all CSV files
all_logs = []
for file in os.listdir(log_dir):
    if file.endswith('.csv'):
        file_path = os.path.join(log_dir, file)
        log = pd.read_csv(file_path)
        all_logs.append(log)

# Combine all logs into a single DataFrame
merged_logs = pd.concat(all_logs, ignore_index=True)
print(merged_logs.head())

                    timestamp  sensor_id      value
0  2025-01-09 19:01:51.680019          1  81.502427
1  2025-01-06 15:40:01.428892          5  76.684361
2  2025-01-03 19:46:38.810853          2  70.723500
3  2025-01-08 08:04:37.900929          1  28.359448
4  2025-01-02 16:34:57.811176          3  10.296229


## Clean Data

In [4]:
# Drop duplicate rows
merged_logs.drop_duplicates(inplace=True)

# Handle missing values
merged_logs.fillna(method='ffill', inplace=True)
print(merged_logs.info())

  merged_logs.fillna(method='ffill', inplace=True)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 823 entries, 0 to 822
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   timestamp  823 non-null    object 
 1   sensor_id  823 non-null    int64  
 2   value      823 non-null    float64
dtypes: float64(1), int64(1), object(1)
memory usage: 19.4+ KB
None


## Transform Fields

In [5]:
# Example: Convert timestamp to datetime
merged_logs['timestamp'] = pd.to_datetime(merged_logs['timestamp'])

# Example: Normalize a numeric field
merged_logs['value'] = (merged_logs['value'] - merged_logs['value'].mean()) / merged_logs['value'].std()
print(merged_logs.head())

                   timestamp  sensor_id     value
0 2025-01-09 19:01:51.680019          1  1.001285
1 2025-01-06 15:40:01.428892          5  0.817754
2 2025-01-03 19:46:38.810853          2  0.590692
3 2025-01-08 08:04:37.900929          1 -1.023045
4 2025-01-02 16:34:57.811176          3 -1.711111


## Prepare Exportable Summary

In [6]:
#Exportable Summary
summary = merged_logs.describe()
summary.to_csv('summary_statistics.csv', index=False)
print(summary)
print("Summary statistics saved to summary_statistics.csv")

                           timestamp   sensor_id         value
count                            823  823.000000  8.230000e+02
mean   2025-01-06 03:13:14.253918208    5.036452 -8.849408e-17
min       2025-01-01 00:01:34.123960    1.000000 -1.721807e+00
25%    2025-01-03 14:05:57.874546944    3.000000 -8.557974e-01
50%    2025-01-06 04:46:21.899525120    5.000000  4.045858e-02
75%    2025-01-08 13:34:39.058206464    7.000000  8.889155e-01
max       2025-01-10 23:29:43.020769    9.000000  1.705544e+00
std                              NaN    2.621942  1.000000e+00
Summary statistics saved to summary_statistics.csv
