Import Required Libraries

In [28]:
import pandas as pd # for data manuipulation and analysis
import numpy as np # for data manuipulation and analysis
import sklearn as sk # for Machine learning metrics
import matplotlib as plt # for Plotting

Load Your Dataset

In [9]:
df = pd.read_csv('/content/sensor_log.csv')

Viewing dataset

In [10]:
df.head()

Unnamed: 0,timestamp,temperature_c,humidity_pct,voltage_v
0,2025-10-01 08:00:00,24.5,55.2,3.7
1,2025-10-01 08:00:10,24.7,55.0,3.69
2,2025-10-01 08:00:20,24.6,55.1,
3,2025-10-01 08:00:30,,54.9,3.68
4,2025-10-01 08:01:00,24.9,54.8,3.68


Basic information about the dataset

In [23]:
info = {
    "shape": df.shape,
    "columns": df.columns.tolist(),
    "dtypes": df.dtypes.apply(lambda x: x.name).to_dict(),
    "missing_counts": df.isna().sum().to_dict(),
    "total_missing": int(df.isna().sum().sum())
}
print("Basic info:", info)

Basic info: {'shape': (10, 4), 'columns': ['timestamp', 'temperature_c', 'humidity_pct', 'voltage_v'], 'dtypes': {'timestamp': 'object', 'temperature_c': 'float64', 'humidity_pct': 'float64', 'voltage_v': 'float64'}, 'missing_counts': {'timestamp': 0, 'temperature_c': 2, 'humidity_pct': 1, 'voltage_v': 1}, 'total_missing': 4}


Find duplicates (all columns identical)

In [24]:
duplicates_mask = df.duplicated(keep=False)
duplicates = df[duplicates_mask].copy()
duplicates_count = duplicates.shape[0]
print(f"Found {duplicates_count} duplicate rows (keep=False).")

Found 0 duplicate rows (keep=False).


Numeric columns for imputation evaluation

In [29]:
numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
print("Numeric columns:", numeric_cols)

Numeric columns: ['temperature_c', 'humidity_pct', 'voltage_v']


Handle Missing Values - Interpolation

In [31]:
df_interpolated = df.interpolate()
df_interpolated

  df_interpolated = df.interpolate()


Unnamed: 0,timestamp,temperature_c,humidity_pct,voltage_v
0,2025-10-01 08:00:00,24.5,55.2,3.7
1,2025-10-01 08:00:10,24.7,55.0,3.69
2,2025-10-01 08:00:20,24.6,55.1,3.685
3,2025-10-01 08:00:30,24.75,54.9,3.68
4,2025-10-01 08:01:00,24.9,54.8,3.68
5,2025-10-01 08:02:15,25.1,54.75,3.67
6,2025-10-01 08:03:00,25.3,54.7,3.67
7,2025-10-01 08:05:30,25.5,54.9,3.65
8,2025-10-01 08:08:00,25.75,55.0,3.64
9,2025-10-01 08:10:00,26.0,55.1,3.63


Rechecking for missing values

In [32]:
df_interpolated.isna().sum()

Unnamed: 0,0
timestamp,0
temperature_c,0
humidity_pct,0
voltage_v,0


Handle Missing Values - Forward Fill

In [33]:
df_ffill = df.fillna(method='ffill')
df_ffill

  df_ffill = df.fillna(method='ffill')


Unnamed: 0,timestamp,temperature_c,humidity_pct,voltage_v
0,2025-10-01 08:00:00,24.5,55.2,3.7
1,2025-10-01 08:00:10,24.7,55.0,3.69
2,2025-10-01 08:00:20,24.6,55.1,3.69
3,2025-10-01 08:00:30,24.6,54.9,3.68
4,2025-10-01 08:01:00,24.9,54.8,3.68
5,2025-10-01 08:02:15,25.1,54.8,3.67
6,2025-10-01 08:03:00,25.3,54.7,3.67
7,2025-10-01 08:05:30,25.5,54.9,3.65
8,2025-10-01 08:08:00,25.5,55.0,3.64
9,2025-10-01 08:10:00,26.0,55.1,3.63


Rechecking for missing values

In [34]:
df_ffill.isna().sum()

Unnamed: 0,0
timestamp,0
temperature_c,0
humidity_pct,0
voltage_v,0


Handle Missing Values - Backward Fill

In [35]:
df_bfill = df.fillna(method='bfill')
df_bfill

  df_bfill = df.fillna(method='bfill')


Unnamed: 0,timestamp,temperature_c,humidity_pct,voltage_v
0,2025-10-01 08:00:00,24.5,55.2,3.7
1,2025-10-01 08:00:10,24.7,55.0,3.69
2,2025-10-01 08:00:20,24.6,55.1,3.68
3,2025-10-01 08:00:30,24.9,54.9,3.68
4,2025-10-01 08:01:00,24.9,54.8,3.68
5,2025-10-01 08:02:15,25.1,54.7,3.67
6,2025-10-01 08:03:00,25.3,54.7,3.67
7,2025-10-01 08:05:30,25.5,54.9,3.65
8,2025-10-01 08:08:00,26.0,55.0,3.64
9,2025-10-01 08:10:00,26.0,55.1,3.63


Rechecking for missing values

In [36]:
df_bfill.isna().sum()

Unnamed: 0,0
timestamp,0
temperature_c,0
humidity_pct,0
voltage_v,0


Summary Statistics

In [37]:
summary_stats = df.describe()
summary_stats

Unnamed: 0,temperature_c,humidity_pct,voltage_v
count,8.0,9.0,9.0
mean,25.075,54.966667,3.667778
std,0.509201,0.158114,0.023333
min,24.5,54.7,3.63
25%,24.675,54.9,3.65
50%,25.0,55.0,3.67
75%,25.35,55.1,3.68
max,26.0,55.2,3.7
