In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

## Step 1: The data ingestion and transformation steps.

### Loading Dataset

In [None]:
df = pd.read_csv("/content/sensor_data.csv")

In [None]:
df.sample(3)

In [None]:
df.shape

## Creating new Column:

- A new column, combined_value, is derived by summing the two original columns.

In [None]:
df['combined_value'] = df['sensor_reading'] + df['control_value']

In [None]:
df.sample(2)

In [None]:
df.duplicated().sum() # check for duplicates.

In [None]:
df.isnull().sum() # null value check

In [None]:
df.describe()

## Step 2: The analytical queries calculating the required statistics.


In [None]:
result = {
    'Mean': df.mean(),
    'Median': df.median(),
    'Mode': df.mode().iloc[0]  # Taking the first mode value in case of multiple modes
}

summary_df = pd.DataFrame(result)

In [None]:
temp = summary_df.iloc[[1,2,-1],:]

In [None]:
temp

## Step 3: Visualizations:

In [None]:
cols = ['sensor_reading', 'control_value', 'combined_value']
c = ['r','g','b']

In [None]:
fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(15, 5))

for i in cols:
  sns.kdeplot(data=df, x=i, ax=axes[cols.index(i)],color=c[cols.index(i)])
  axes[cols.index(i)].set_title(f'Distribution of {i}')

plt.tight_layout()
plt.show()

In [None]:
# plotting heatmap for correalation between all pairs of variables

plt.figure(figsize=(12,5))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

- Best correalation between:
1. adjusted_sensor and combined_value columns.
2. adjusted_sensor and performance_metric
3. performance_metric and sensor_reading

In [None]:
plt.figure(figsize=(10,5))
sns.histplot(df["sensor_reading"], bins=30, kde=True, color='red', label="Sensor Reading", alpha=0.5)
sns.histplot(df["control_value"], bins=30, kde=True, color='blue', label="Control Value", alpha=0.5)
sns.histplot(df["combined_value"], bins=30, kde=True, color='green', label="Combined Value", alpha=0.5)
plt.xlabel("Value")
plt.ylabel("Frequency")
plt.title("Distribution of Sensor Readings & Control Values")
plt.legend()
plt.show()