#### Randomly generate Environmental data

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random

# Parameters
num_silos = 20
num_grain_temp_per_silo = 3
start_date = datetime(2024, 4, 29)
end_date = datetime(2024, 6, 30)
time_intervals = [("08:00", "morning"), ("14:00", "afternoon")]
hour_intervals = [(hour, 0) for hour in range(8, 18)]

# Calculate the number of days
num_days = (end_date - start_date).days + 1

# Generate data
data = []

for silo in range(1, num_silos + 1):
    location = f"Silo{silo}"
    temp_device = f"Temp{silo}"
    humd_device = f"Humd{silo}"
    in_temp_device = f"InTemp{silo}"
    
    for day in range(num_days):  # Generate data for each day in the date range
        current_date = start_date + timedelta(days=day)
        for time, period in time_intervals:
            timestamp = current_date.strftime(f"%Y-%m-%d {time}:00")
            temp_value = round(np.random.uniform(8, 25), 1)
            humd_value = round(np.random.uniform(5, 35), 1)
            in_temp_value = round(np.random.uniform(6, 25), 1)
            
            data.append([temp_device, timestamp, location, "temperature", temp_value, "celsius"])
            data.append([humd_device, timestamp, location, "humidity", humd_value, "percentage"])
            data.append([in_temp_device, timestamp, location, "intemperature", in_temp_value, "celsius"])
        
        # Generate grain temperature data every day from 8 AM to 5 PM
        for hour, minute in hour_intervals:
            grain_temp_time = current_date.replace(hour=hour, minute=minute, second=0).strftime("%Y-%m-%d %H:%M:%S")
                
            for grain_temp_num in range(1, num_grain_temp_per_silo + 1):
                grain_temp_device = f"GrainTemp{(silo-1)*num_grain_temp_per_silo + grain_temp_num}"
                grain_temp_value = round(np.random.uniform(1, 15), 1)
                    
                data.append([grain_temp_device, grain_temp_time, location, "graintemperature", grain_temp_value, "celsius"])

# Create DataFrame
columns = ["IoTDeviceID", "timestamp", "Location", "Type", "Value", "Unit"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
csv_path = "Silo_iot_date.csv"
df.to_csv(csv_path, index=False)


##### reshape the environmental data csv

In [None]:
# Load the CSV file
file_path = 'Silo_iot_date.csv'
df = pd.read_csv(file_path)

# Initialize a dictionary to hold the reshaped data
reshaped_data = []

# Iterate over each unique combination of Location and timestamp
for (location, timestamp), group in df.groupby(['Location', 'timestamp']):
    row = {'Location': location, 'timestamp': timestamp}
    for _, record in group.iterrows():
        if record['Type'] == 'temperature':
            row['Temperature'] = record['Value']
            row['Temperature_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'humidity':
            row['Humidity'] = record['Value']
            row['Humidity_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'intemperature':
            row['InTemperature'] = record['Value']
            row['InTemperature_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'graintemperature':
            if 'GrainTemperature1' not in row:
                row['GrainTemperature1'] = record['Value']
                row['GrainTemperature1_IoTDeviceID'] = record['IoTDeviceID']
            elif 'GrainTemperature2' not in row:
                row['GrainTemperature2'] = record['Value']
                row['GrainTemperature2_IoTDeviceID'] = record['IoTDeviceID']
            else:
                row['GrainTemperature3'] = record['Value']
                row['GrainTemperature3_IoTDeviceID'] = record['IoTDeviceID']
    reshaped_data.append(row)

# Convert the list of dictionaries to a dataframe
reshaped_df = pd.DataFrame(reshaped_data)

# Replace NaN values with 'null' for IoTDeviceID columns and 0.0 for measurement columns
reshaped_df.fillna({
    'Temperature': 0.0,
    'Temperature_IoTDeviceID': 'null',
    'Humidity': 0.0,
    'Humidity_IoTDeviceID': 'null',
    'InTemperature': 0.0,
    'InTemperature_IoTDeviceID': 'null',
    'GrainTemperature1': 0.0,
    'GrainTemperature1_IoTDeviceID': 'null',
    'GrainTemperature2': 0.0,
    'GrainTemperature2_IoTDeviceID': 'null',
    'GrainTemperature3': 0.0,
    'GrainTemperature3_IoTDeviceID': 'null'
}, inplace=True)

reshaped_df.insert(reshaped_df.columns.get_loc("GrainTemperature3_IoTDeviceID") + 1, '', None)

# Save the reshaped dataframe to a new CSV file
reshaped_df.to_csv('Reshaped_Silo_IoT_Data.csv', index=False)

reshaped_df.head()


Unnamed: 0,Location,timestamp,Temperature,Temperature_IoTDeviceID,Humidity,Humidity_IoTDeviceID,InTemperature,InTemperature_IoTDeviceID,GrainTemperature1,GrainTemperature1_IoTDeviceID,GrainTemperature2,GrainTemperature2_IoTDeviceID,GrainTemperature3,GrainTemperature3_IoTDeviceID,Unnamed: 15
0,Silo1,2024-04-29 08:00:00,16.7,Temp1,32.9,Humd1,7.2,InTemp1,10.9,GrainTemp1,11.9,GrainTemp2,15.0,GrainTemp3,
1,Silo1,2024-04-29 09:00:00,0.0,,0.0,,0.0,,7.4,GrainTemp1,12.0,GrainTemp2,11.4,GrainTemp3,
2,Silo1,2024-04-29 10:00:00,0.0,,0.0,,0.0,,4.6,GrainTemp1,6.9,GrainTemp2,4.1,GrainTemp3,
3,Silo1,2024-04-29 11:00:00,0.0,,0.0,,0.0,,11.2,GrainTemp1,3.2,GrainTemp2,2.6,GrainTemp3,
4,Silo1,2024-04-29 12:00:00,0.0,,0.0,,0.0,,10.1,GrainTemp1,10.1,GrainTemp2,11.6,GrainTemp3,


In [None]:
reshaped_df.shape[0]

12600

#### Randomly generate weightsensor data

In [None]:
import pandas as pd

# Parameters
num_devices = 20
device_prefix = "WS"
location_prefix = "weighbridge"

# Generate data
data = []
for i in range(1, num_devices + 1):
    device_id = f"{device_prefix}{i:02d}"
    location = f"{location_prefix}{i}"
    device_type = "weight sensor"
    data.append([device_id, location, device_type])

# Create DataFrame
columns = ["IoTDeviceID", "Location", "Type"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
csv_path = "weight_sensor_data.csv"
df.to_csv(csv_path, index=False)

# print(f"CSV file has been saved as {csv_path}")


In [None]:
# Save the new dataframe to a CSV file
output_csv_path = 'empty_weight_history.csv'
df_extracted.to_csv(output_csv_path, index=False)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import random
import math

# dew point calculation
def dew_point(T, RH):
    return (474242.0 / (474242.0 / 273.0 + T - 89.1 * np.log(RH) + 410.34)) - 273.0

# Parameters
num_silos = 20
num_grain_temp_per_silo = 3
start_date = datetime(2024, 4, 29)
end_date = datetime(2024, 10, 31)
time_intervals = [("08:00", "morning"), ("14:00", "afternoon")]
hour_intervals = [(hour, 0) for hour in range(8, 18)]

# Calculate the number of days
num_days = (end_date - start_date).days + 1

# Generate data
data = []

for silo in range(1, num_silos + 1):
    location = f"Silo{silo}"
    temp_device = f"Temp{silo}"
    humd_device = f"Humd{silo}"
    in_temp_device = f"InTemp{silo}"
    
    for day in range(num_days):   # Generate data for each day in the date range
        current_date = start_date + timedelta(days=day)
        for time, period in time_intervals:
            timestamp = current_date.strftime(f"%Y-%m-%d {time}:00")
            temp_value = round(np.random.uniform(8, 25), 1)
            humd_value = round(np.random.uniform(5, 35), 1)
            in_temp_value = round(np.random.uniform(6, 25), 1)
            
            # Calculate dew point
            dew_point_value = dew_point(temp_value, humd_value)

            data.append([temp_device, timestamp, location, "temperature", temp_value, "celsius"])
            data.append([humd_device, timestamp, location, "humidity", humd_value, "percentage"])
            data.append([in_temp_device, timestamp, location, "intemperature", in_temp_value, "celsius"])
        
         # Generate grain temperature data every day from 8 AM to 5 PM
        for hour, minute in hour_intervals:
            grain_temp_time = current_date.replace(hour=hour, minute=minute, second=0).strftime("%Y-%m-%d %H:%M:%S")
            
            # Set the probability of the below dew point
            is_below_dew_point = random.random() < 0.15
            
            # Sum of grain temperature
            grain_temps = []
                
            for grain_temp_num in range(1, num_grain_temp_per_silo + 1):
                grain_temp_device = f"GrainTemp{(silo-1)*num_grain_temp_per_silo + grain_temp_num}"
                
                if is_below_dew_point:
                    # Generate the grain temperature lower than dew point
                    if grain_temp_num < num_grain_temp_per_silo:
                        grain_temp_value = round(np.random.uniform(1, dew_point_value), 1)
                    else:
                        remaining_temp = (dew_point_value * num_grain_temp_per_silo) - sum(grain_temps)
                        grain_temp_value = round(min(remaining_temp, dew_point_value), 1)
                else:
                    # Generate grain temperature higher than or equal to the dew point
                    grain_temp_value = round(np.random.uniform(dew_point_value + 0.1, 15), 1)

                grain_temps.append(grain_temp_value)
                data.append([grain_temp_device, grain_temp_time, location, "graintemperature", grain_temp_value, "celsius"])

# Create DataFrame
columns = ["IoTDeviceID", "timestamp", "Location", "Type", "Value", "Unit"]
df = pd.DataFrame(data, columns=columns)

# Save to CSV
csv_path = "Silo_iot_data_with_dewpoint_prob.csv"
df.to_csv(csv_path, index=False)

# Load the CSV file
file_path = 'Silo_iot_data_with_dewpoint_prob.csv'
df = pd.read_csv(file_path)

# Initialize a dictionary to hold the reshaped data
reshaped_data = []

# Iterate over each unique combination of Location and timestamp
for (location, timestamp), group in df.groupby(['Location', 'timestamp']):
    row = {'Location': location, 'timestamp': timestamp}
    for _, record in group.iterrows():
        if record['Type'] == 'temperature':
            row['Temperature'] = record['Value']
            row['Temperature_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'humidity':
            row['Humidity'] = record['Value']
            row['Humidity_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'intemperature':
            row['InTemperature'] = record['Value']
            row['InTemperature_IoTDeviceID'] = record['IoTDeviceID']
        elif record['Type'] == 'graintemperature':
            if 'GrainTemperature1' not in row:
                row['GrainTemperature1'] = record['Value']
                row['GrainTemperature1_IoTDeviceID'] = record['IoTDeviceID']
            elif 'GrainTemperature2' not in row:
                row['GrainTemperature2'] = record['Value']
                row['GrainTemperature2_IoTDeviceID'] = record['IoTDeviceID']
            else:
                row['GrainTemperature3'] = record['Value']
                row['GrainTemperature3_IoTDeviceID'] = record['IoTDeviceID']
    reshaped_data.append(row)

# Convert the list of dictionaries to a dataframe
reshaped_df = pd.DataFrame(reshaped_data)


# Replace NaN values with 'null' for IoTDeviceID columns and 0.0 for measurement columns
reshaped_df.fillna({
    'Temperature': 0.0,
    'Temperature_IoTDeviceID': 'null',
    'Humidity': 0.0,
    'Humidity_IoTDeviceID': 'null',
    'InTemperature': 0.0,
    'InTemperature_IoTDeviceID': 'null',
    'GrainTemperature1': 0.0,
    'GrainTemperature1_IoTDeviceID': 'null',
    'GrainTemperature2': 0.0,
    'GrainTemperature2_IoTDeviceID': 'null',
    'GrainTemperature3': 0.0,
    'GrainTemperature3_IoTDeviceID': 'null'
}, inplace=True)

reshaped_df.insert(reshaped_df.columns.get_loc("GrainTemperature3_IoTDeviceID") + 1, '', None)

# Save the reshaped dataframe to a new CSV file
reshaped_df.to_csv('Reshaped_Silo_IoT_Data_with_dewpoint_prob.csv', index=False)


In [None]:
reshaped_df

Unnamed: 0,Location,timestamp,Temperature,Temperature_IoTDeviceID,Humidity,Humidity_IoTDeviceID,InTemperature,InTemperature_IoTDeviceID,GrainTemperature1,GrainTemperature1_IoTDeviceID,GrainTemperature2,GrainTemperature2_IoTDeviceID,GrainTemperature3,GrainTemperature3_IoTDeviceID,Unnamed: 15
0,Silo1,2024-04-29 08:00:00,17.8,Temp1,19.5,Humd1,23.8,InTemp1,-14.8,GrainTemp1,-2.3,GrainTemp2,-14.6,GrainTemp3,
1,Silo1,2024-04-29 09:00:00,0.0,,0.0,,0.0,,-20.2,GrainTemp1,-19.3,GrainTemp2,-24.8,GrainTemp3,
2,Silo1,2024-04-29 10:00:00,0.0,,0.0,,0.0,,-11.1,GrainTemp1,-8.2,GrainTemp2,10.8,GrainTemp3,
3,Silo1,2024-04-29 11:00:00,0.0,,0.0,,0.0,,5.2,GrainTemp1,-16.1,GrainTemp2,9.1,GrainTemp3,
4,Silo1,2024-04-29 12:00:00,0.0,,0.0,,0.0,,12.7,GrainTemp1,-1.7,GrainTemp2,-1.9,GrainTemp3,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
37195,Silo9,2024-10-31 13:00:00,0.0,,0.0,,0.0,,8.4,GrainTemp25,5.1,GrainTemp26,-14.5,GrainTemp27,
37196,Silo9,2024-10-31 14:00:00,8.5,Temp9,33.3,Humd9,7.3,InTemp9,4.1,GrainTemp25,10.9,GrainTemp26,-2.4,GrainTemp27,
37197,Silo9,2024-10-31 15:00:00,0.0,,0.0,,0.0,,-2.4,GrainTemp25,2.3,GrainTemp26,-0.6,GrainTemp27,
37198,Silo9,2024-10-31 16:00:00,0.0,,0.0,,0.0,,-3.2,GrainTemp25,11.6,GrainTemp26,4.4,GrainTemp27,


In [1]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta

# Define start and end dates
start_date = datetime(2024, 4, 29)
end_date = datetime(2024, 6, 30)

# Generate hourly timestamps
date_range = pd.date_range(start=start_date, end=end_date, freq='H')

# Function to generate rainfall amount (mm)
def generate_rainfall():
    """
    Generates random rainfall with a probability distribution:
    - 70% chance of no rain (0 - 1.9 mm)
    - 30% chance of moderate to heavy rain (2 - 15 mm)
    """
    if np.random.rand() < 0.7:
        return round(np.random.uniform(0, 1.9), 1)  # Light or no rain
    else:
        return round(np.random.uniform(2, 15), 1)  # Moderate to heavy rain

# Create rainfall data
rainfall_data = [generate_rainfall() for _ in date_range]



# Create DataFrame
df = pd.DataFrame({
    "DateTime": date_range,
    "Rainfall (mm)": rainfall_data
})

# Save to CSV
csv_filename = "hourly_rainfall_data.csv"
df.to_csv(csv_filename, index=False)

print(f"Rainfall data saved to {csv_filename}")


  from pandas.core import (


Rainfall data saved to hourly_rainfall_data.csv


Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  date_range = pd.date_range(start=start_date, end=end_date, freq='H')
