# Extracting Sample Data from the Synthetic Dataset

Related to Homework: Smart Contract Data Storage, this is a sample script to simulate a few entries from the dataset to send to the  contract.

In [3]:
import pandas as pd
import re

In [4]:
# Load your dataset
df = pd.read_csv("synthetic_dataset.csv", parse_dates=["Timestamp"])

In [5]:
# Select relevant columns
selected_columns = ['Timestamp', 'Temperature (°C)', 'Conductivity (µS/cm)', 'Turbidity (NTU)', 'ClO2 MS1 (mg/L)', 'pH']

# Rename ClO2 MS1 (mg/L) column for consistency
df_selected = df[selected_columns].rename(columns={'ClO2 MS1 (mg/L)': 'ClO2 (mg/L)'})

# Replace all NaN values with 0
df_selected = df_selected.fillna(0)

# Extract a small sample for testing (top 2 rows)
sample_data = df_selected.head(2)

# Function to sanitize column names for Solidity identifiers
def sanitize_column_name(name):
    # Replace spaces, parentheses, slashes with underscore
    sanitized = re.sub(r'[ \(\)/]', '_', name)
    # Replace multiple underscores with a single underscore
    sanitized = re.sub(r'_+', '_', sanitized)
    # Trim leading/trailing underscores
    return sanitized.strip('_')

# Function to generate Solidity storeData() calls from DataFrame
def generate_solidity_calls(df):
    calls = []
    for _, row in df.iterrows():
        timestamp = row['Timestamp']
        for col in df.columns[1:]:  # Skip the Timestamp column
            try:
                value = float(row[col])
            except (ValueError, TypeError):
                value = 0  # fallback if value is not convertible

            # Convert float to int for Solidity int256 (adjust if decimals needed)
            int_value = int(value)

            sensor_id = f"SENSOR_{sanitize_column_name(col)}"
            data_type = col

            # Generate Solidity call with a comment including timestamp
            call = f'storeData("{sensor_id}", "{data_type}", {int_value}); // {timestamp}'
            calls.append(call)
    return calls

# Generate the Solidity storeData() calls for sample data
calls = generate_solidity_calls(sample_data)

# Print the generated calls
for call in calls:
    print(call)

storeData("SENSOR_Temperature_°C", "Temperature (°C)", 7); // 2016-08-03 11:49:00
storeData("SENSOR_Conductivity_µS_cm", "Conductivity (µS/cm)", 198); // 2016-08-03 11:49:00
storeData("SENSOR_Turbidity_NTU", "Turbidity (NTU)", 0); // 2016-08-03 11:49:00
storeData("SENSOR_ClO2_mg_L", "ClO2 (mg/L)", 0); // 2016-08-03 11:49:00
storeData("SENSOR_pH", "pH", 8); // 2016-08-03 11:49:00
storeData("SENSOR_Temperature_°C", "Temperature (°C)", 9); // 2016-08-03 11:50:00
storeData("SENSOR_Conductivity_µS_cm", "Conductivity (µS/cm)", 208); // 2016-08-03 11:50:00
storeData("SENSOR_Turbidity_NTU", "Turbidity (NTU)", 0); // 2016-08-03 11:50:00
storeData("SENSOR_ClO2_mg_L", "ClO2 (mg/L)", 0); // 2016-08-03 11:50:00
storeData("SENSOR_pH", "pH", 8); // 2016-08-03 11:50:00
