In [15]:
import pandas as pd
import numpy as np
from datetime import timedelta, datetime

# Parameters
num_days = 7  # Number of days to collect data
hours_per_day = 24  # Number of hours in a day
start_date = datetime(2024, 1, 1)  # Starting date
num_samples = num_days * hours_per_day

# Generate data
data = []
for hour in range(num_samples):
    timestamp = start_date + timedelta(hours=hour)

    # Simulating realistic patterns
    humidity = np.random.normal(loc=75, scale=10)  # Average 75% humidity with some variability
    temperature = np.random.normal(loc=25, scale=5)  # Average 25°C
    soil_moisture = np.random.normal(loc=50, scale=15)  # Average 50% soil moisture
    gas_levels = np.random.uniform(0, 100)  # Simulated gas levels

    # Determine disease risk based on conditions
    diseases_prone_to = []
    disease_risk = 0

    if humidity > 90 and temperature > 30:
        diseases_prone_to.append("Apple___Apple_scab")
        disease_risk = 1
    elif humidity > 80 and soil_moisture > 70:
        diseases_prone_to.append("Apple___Black_rot")
        disease_risk = 1
    elif temperature > 35 and humidity > 75:
        diseases_prone_to.append("Apple___Cedar_apple_rust")
        disease_risk = 1

    if not diseases_prone_to:
        diseases_prone_to = ["Healthy"]

    data.append([timestamp, humidity, temperature, soil_moisture, gas_levels, ', '.join(diseases_prone_to), disease_risk])

# Create DataFrame
df = pd.DataFrame(data, columns=["Timestamp", "Humidity", "Temperature", "Soil Moisture", "Gas Levels", "Diseases Prone To", "Disease Risk"])

# Save to CSV for reference
df.to_csv("synthetic_hourly_apple_scab_dataset.csv", index=False)

# Display the first few rows of the dataset
print(df.head())


            Timestamp   Humidity  Temperature  Soil Moisture  Gas Levels  \
0 2024-01-01 00:00:00  78.262380    21.894255      26.878127   95.262174   
1 2024-01-01 01:00:00  77.900060    21.602907      45.351559    4.439906   
2 2024-01-01 02:00:00  59.334534    35.841645      56.869532    3.531373   
3 2024-01-01 03:00:00  74.395622    29.108370      55.393598   34.704940   
4 2024-01-01 04:00:00  89.123547    21.510083      40.399514   47.839250   

  Diseases Prone To  Disease Risk  
0           Healthy             0  
1           Healthy             0  
2           Healthy             0  
3           Healthy             0  
4           Healthy             0  


In [16]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Prepare data for training
X = df[['Humidity', 'Temperature', 'Soil Moisture', 'Gas Levels']]
y = df['Disease Risk']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize and train the logistic regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Confusion Matrix:
[[47  1]
 [ 2  1]]

Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.98      0.97        48
           1       0.50      0.33      0.40         3

    accuracy                           0.94        51
   macro avg       0.73      0.66      0.68        51
weighted avg       0.93      0.94      0.94        51



In [17]:
pip install joblib




In [18]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'logistic_regression_model.pkl')


['logistic_regression_model.pkl']

In [20]:
import pandas as pd
import joblib

# Load the trained model
loaded_model = joblib.load('logistic_regression_model.pkl')

# Prepare new input data
new_data = {
    'Humidity': [85, 92, 75],        # Example humidity values
    'Temperature': [30, 28, 22],     # Example temperature values
    'Soil Moisture': [65, 80, 50],    # Example soil moisture values
    'Gas Levels': [20, 15, 30]        # Example gas levels
}

# Create a DataFrame for the new input data
new_df = pd.DataFrame(new_data)

# Make predictions using the loaded model
predictions = loaded_model.predict(new_df)

# Define the disease mapping based on your classes
disease_mapping = {
    0: "Healthy",
    1: "Apple___Apple_scab",
    2: "Apple___Black_rot",
    3: "Apple___Cedar_apple_rust",
    # Add other mappings as needed
}

# Output the predictions with disease names
for i, pred in enumerate(predictions):
    disease = disease_mapping.get(pred, "Unknown disease")
    print(f"Prediction for input {i + 1}: {disease}")


Prediction for input 1: Apple___Apple_scab
Prediction for input 2: Apple___Apple_scab
Prediction for input 3: Healthy
