# Curso Sistemas Inteligentes e Robótica em Ciências Agrárias

## Análise de Dados

Este notebook serve para testar os exercícios gerados a partir do Gemini, descritos 
no notebook 



In [1]:
import pandas as pd
import numpy as np
import datetime

# --- Configuration ---
num_rows = 200  # Number of data points (e.g., hourly readings for a week)
start_date = datetime.datetime(2023, 1, 1, 0, 0, 0)
time_delta = datetime.timedelta(hours=1) # Time interval between readings

depths_cm = [5, 15, 30, 50]  # Depths in centimeters

# --- Helper function to generate data with some trend and noise ---
def generate_sensor_data(base_value, trend_factor, noise_level, size):
    """Generates sensor data with a slight trend and random noise."""
    trend = np.linspace(0, trend_factor, size)
    noise = np.random.normal(0, noise_level, size)
    data = base_value + trend + noise
    return data

# --- Generate Timestamps ---
timestamps = [start_date + i * time_delta for i in range(num_rows)]

# --- Initialize Data Dictionary ---
data = {'timestamp': timestamps}

# --- Generate Data for Each Parameter and Depth ---

# 1. Temperature (°C)
#    - Deeper soil has more stable temperature.
#    - Surface temperature fluctuates more with air temperature.
base_temp_surface = 15  # Average surface temperature
for depth in depths_cm:
    # Deeper soil is generally cooler in summer/hotter in winter, but less variable daily
    # For simplicity, let's make deeper soil slightly warmer on average and less noisy
    temp_variation_factor = 5 / (depth + 1) # Less variation for deeper soil
    depth_temp_offset = depth * 0.1 # Deeper soil slightly warmer on average (can be adjusted)
    temps = generate_sensor_data(
        base_value=base_temp_surface + depth_temp_offset,
        trend_factor=np.random.uniform(-2, 2), # Slight daily/weekly trend
        noise_level=temp_variation_factor,
        size=num_rows
    )
    # Clip to realistic temperature ranges
    data[f'temperature_{depth}cm_celsius'] = np.clip(temps, -5, 40)

# 2. Volumetric Water Content (VWC) (m³/m³ or %)
#    - Typically ranges from 0.05 (very dry) to 0.45 (saturated)
#    - Can vary significantly with rainfall and irrigation
base_vwc = 0.25 # Average VWC
for depth in depths_cm:
    vwc_variation_factor = 0.1 / (depth/10 + 1) # More variation near surface
    vwc_values = generate_sensor_data(
        base_value=base_vwc - (depth * 0.005), # Slightly drier deeper down (can be adjusted)
        trend_factor=np.random.uniform(-0.1, 0.1), # Simulating drying or wetting periods
        noise_level=vwc_variation_factor,
        size=num_rows
    )
    # Clip to realistic VWC ranges
    data[f'vwc_{depth}cm_percent'] = np.clip(vwc_values * 100, 5, 50) # As percentage

# 3. Soil Moisture Index (SMI) (unitless, often 0-1 or 0-100)
#    - Derived from VWC, often scaled. Let's assume 0 (dry) to 100 (saturated).
#    - This will be correlated with VWC.
for depth in depths_cm:
    # For simplicity, let's scale VWC to SMI (0-100)
    # This is a simplified SMI; real SMI can be more complex
    if f'vwc_{depth}cm_percent' in data:
        smi_values = (data[f'vwc_{depth}cm_percent'] - 5) / (50 - 5) * 100 # Scale from VWC range to 0-100
        # Add some independent noise to SMI as it might be a derived/calibrated value
        smi_noise = np.random.normal(0, 5, num_rows)
        smi_values = np.clip(smi_values + smi_noise, 0, 100)
        data[f'smi_{depth}cm_index'] = smi_values
    else: # Should not happen if VWC is generated
        data[f'smi_{depth}cm_index'] = np.full(num_rows, np.nan)


# --- Create DataFrame ---
df = pd.DataFrame(data)

# --- Introduce Missing Data ---
# Introduce approximately 5% missing data randomly across sensor columns
columns_to_nan = [col for col in df.columns if col != 'timestamp']
for col in columns_to_nan:
    nan_indices = np.random.choice(
        df.index,
        size=int(num_rows * 0.05), # 5% missing values
        replace=False
    )
    df.loc[nan_indices, col] = np.nan

# --- Display DataFrame Info and Head ---
print("DataFrame Info:")
df.info()
print("\nDataFrame Head:")
print(df.head())
print(f"\nDataFrame Shape: {df.shape}")

# --- Optional: Save to CSV ---
# df.to_csv("iot_soil_sensor_data.csv", index=False)
# print("\nDataFrame saved to iot_soil_sensor_data.csv")


DataFrame Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   timestamp                 200 non-null    datetime64[ns]
 1   temperature_5cm_celsius   190 non-null    float64       
 2   temperature_15cm_celsius  190 non-null    float64       
 3   temperature_30cm_celsius  190 non-null    float64       
 4   temperature_50cm_celsius  190 non-null    float64       
 5   vwc_5cm_percent           190 non-null    float64       
 6   vwc_15cm_percent          190 non-null    float64       
 7   vwc_30cm_percent          190 non-null    float64       
 8   vwc_50cm_percent          190 non-null    float64       
 9   smi_5cm_index             190 non-null    float64       
 10  smi_15cm_index            190 non-null    float64       
 11  smi_30cm_index            190 non-null    float64       
 12  smi_50

In [2]:
df

Unnamed: 0,timestamp,temperature_5cm_celsius,temperature_15cm_celsius,temperature_30cm_celsius,temperature_50cm_celsius,vwc_5cm_percent,vwc_15cm_percent,vwc_30cm_percent,vwc_50cm_percent,smi_5cm_index,smi_15cm_index,smi_30cm_index,smi_50cm_index
0,2023-01-01 00:00:00,15.027320,16.927098,17.924209,20.001198,28.615220,14.482392,11.834279,5.0,52.513231,19.923471,,11.071387
1,2023-01-01 01:00:00,14.631065,16.088382,17.868012,20.094106,33.384709,15.182142,10.083168,5.0,62.321670,23.304820,10.045697,2.051735
2,2023-01-01 02:00:00,15.334748,16.914945,17.886776,19.889221,20.342174,17.325983,,5.0,39.190010,26.947002,14.502262,2.666243
3,2023-01-01 03:00:00,14.411057,16.955805,17.876821,19.950961,29.455113,17.615273,10.499203,5.0,54.553237,26.609142,16.853158,0.000000
4,2023-01-01 04:00:00,16.966898,16.760011,17.737039,19.893667,26.165702,27.870682,,5.0,47.423617,52.323558,11.427502,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,2023-01-09 03:00:00,14.545533,17.491916,16.028337,21.616153,32.094399,13.637156,18.525246,5.0,62.971267,23.801723,24.647140,0.000000
196,2023-01-09 04:00:00,13.783487,,15.959462,21.881067,27.855775,14.553933,17.530942,5.0,52.003706,18.494902,32.185574,0.000000
197,2023-01-09 05:00:00,12.630636,17.502334,15.996226,21.920119,16.170992,10.138910,13.926169,5.0,19.135330,7.157379,11.688536,2.907213
198,2023-01-09 06:00:00,12.435081,17.194221,,21.969089,22.698462,17.315824,15.577725,5.0,44.556171,25.992564,21.019949,0.000000
