In [None]:

import os, time, pandas as pd,random
from datetime import datetime

# Output directory
output_dir = "sensor_data/csv"
os.makedirs(output_dir, exist_ok=True)

# Define the headers for the CSV file related to customer transactions
header = ['sensor_id', 'timestamp', 'motor_temperature', 'humidity']


In [42]:

def generate_temperature_humidity_data(sensor_id=1):
    temperature = round(random.uniform(18, 30), 2)
    return {
        "sensor_id": sensor_id,
        "timestamp": datetime.now().isoformat(),
        "temperature": temperature,
        "temperature_f": round(temperature * 9 / 5 + 32, 2),
        "humidity": round(random.uniform(40,70), 2) if random.random() > 0.2 else None
    }


In [43]:
# Save data as CSV
def save_temperature_humidity_csv(filename, data):
    filepath = os.path.join(output_dir, filename)
    df = pd.DataFrame(data)
    df.to_csv(filepath, mode='w', index=False, header=True)  
    print(f"Saved Temperature & Humidity CSV: {filepath}")

In [46]:
# impute only humidity
from sklearn.linear_model import LinearRegression


def impute_missing_values(df, strategy= 'mean'):
    if strategy == 'mean':
        df['humidity'] = df['humidity'].fillna(df['humidity'].mean()).round(2)
    elif strategy == 'median':
        df['humidity'] = df['humidity'].fillna(df['humidity'].median()).round(2)
    elif strategy == 'mode':
        df['humidity'] = df['humidity'].fillna(df['humidity'].mode()).round(2)
    elif strategy == 'regression':
        hum_train = df.dropna(subset=['humidity'])
        model = LinearRegression()
        model.fit(hum_train[['temperature']], hum_train['humidity'])
        hum_missing = df[df['humidity'].isnull()]
        df.loc[df['humidity'].isnull(), 'humidity'] = model.predict(hum_missing[['temperature']]).round(2)
    return df
        

In [47]:

# User input for number of times to run
num_iterations = int(input("Enter number of readings to generate: "))

temperature_humidity_sensor_data = []
# Generate and save sensor data
for i in range(num_iterations):
    temperature_humidity_data = generate_temperature_humidity_data()
    temperature_humidity_sensor_data.append(temperature_humidity_data)
    print(temperature_humidity_data)

    time.sleep(0.1)
    
    if i % 3 == 0 and i != 0:
        temperature_humidity_sensor_data.append(temperature_humidity_data)
        print(temperature_humidity_data)

#create dataframe
df = pd.DataFrame(temperature_humidity_sensor_data)

#save raw data (with duplicates)
save_temperature_humidity_csv("temperature_humidity_errors.csv", df)

#remove duplicated rows
df = df.drop_duplicates()

#check if any column has missing data
#missing_values = df.isnull().sum()
#if missing_values.sum() > 0:
#    missing_columns = missing_values[missing_values > 0].index.tolist()
#    print("\nColumns with missing values:", missing_columns)
#    df = df.dropna()
#    print("\nDropped rows with missing values.")

df = impute_missing_values(df, strategy='mean')
# save cleaned data
save_temperature_humidity_csv("temperature_humidity.csv", df)    

#save_temperature_humidity_csv("temperature_humidity.csv", temperature_humidity_sensor_data)


{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.041442', 'temperature': 28.35, 'temperature_f': 83.03, 'humidity': 66.27}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.142277', 'temperature': 22.04, 'temperature_f': 71.67, 'humidity': 50.42}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.243198', 'temperature': 20.37, 'temperature_f': 68.67, 'humidity': 46.36}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.344391', 'temperature': 24.9, 'temperature_f': 76.82, 'humidity': 44.11}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.344391', 'temperature': 24.9, 'temperature_f': 76.82, 'humidity': 44.11}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.445296', 'temperature': 26.61, 'temperature_f': 79.9, 'humidity': 66.69}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.545502', 'temperature': 19.52, 'temperature_f': 67.14, 'humidity': 50.0}
{'sensor_id': 1, 'timestamp': '2025-03-31T15:31:46.646393', 'temperature': 27.25, 'temperature_f': 81.05, 'humidity': 47.53}
{'se