# Irrigation Predictor Modeling

#### The model utilizes weather features of Johannesburg to predict soil moisture for the system to either irrigate or not to.

In [1]:
import pandas as pd 
import numpy as np 
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline

In [None]:
df = pd.read_csv('New_Dataset.csv', sep=',')
df.head()

### Data Exploration

In [None]:
df.shape

In [None]:
df.dtypes

In [None]:
df.drop(['time', 'snow_depth (m)', 'et0_fao_evapotranspiration (mm)', 'precipitation (mm)',
       'soil_temperature_0_to_7cm (°C)', 'soil_temperature_7_to_28cm (°C)',
       'soil_temperature_28_to_100cm (°C)', 'soil_moisture_0_to_7cm (m³/m³)'], axis=1, inplace=True)

In [None]:
df.head()

#### Convert units of measurement of snowfall, wind speed & wind gust
##### 1. snowfall: cm - mm
##### 2. wind speed: km/h - m/s
##### 3. wind direction: km/h - m/s

In [None]:
# snowfall conversion
df['snowfall(mm)'] = df['snowfall (cm)'] * 10

#wind speed conversion
df['wind_speed(m/s)'] = df['wind_speed_10m (km/h)'] * 3.6

#wind gust conversion
df['wind_gust(m/s)'] = df['wind_gusts_10m (km/h)'] * 3.6

In [None]:
df.drop(['wind_gusts_10m (km/h)', 'snowfall (cm)', 'wind_speed_10m (km/h)'], axis=1, inplace=True)

In [None]:
rename_column = {'soil_moisture_7_to_28cm (m³/m³)': 'soil_moisture'}

df.rename(columns=rename_column, inplace=True)

In [None]:
last_column = df.pop('soil_moisture')
 
df.insert(df.shape[1], 'soil_moisture', last_column)

In [None]:
df.head()

In [None]:
column = df['soil_moisture']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

In [None]:
df['soil_moisture'] = df['soil_moisture'] * 100

In [None]:
column = df['soil_moisture']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

In [None]:
from IPython.display import Image 

Image(url="Soil Moisture Predictor.png", width=600, height=400) 


In [None]:
df.isnull().sum()

In [None]:
df.describe()

### Visualization

In [None]:
df.shape

In [None]:
df.columns

In [2]:
df = pd.read_csv('dataset/WaTime_Dataset.csv')
df.head()

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),surface_pressure (hPa),cloud_cover (%),wind_direction_10m (°),snowfall(mm),wind_speed(m/s),wind_gust(m/s),soil_moisture
0,-2.3,86,0.0,992.7,100,70,1.4,64.8,121.68,37.7
1,-2.5,87,0.0,992.5,100,66,2.1,56.88,120.6,37.7
2,-2.6,87,0.0,992.4,100,56,2.8,53.28,112.68,37.7
3,-2.6,88,0.0,992.1,100,54,3.5,57.6,108.72,37.7
4,-2.7,88,0.0,991.6,100,55,3.5,58.68,108.72,37.7


In [3]:
X = df.drop('soil_moisture',axis=1)
y = np.array(df['soil_moisture'])

feature_list = list(X.columns)
print(feature_list)
# mean_value_temperature = df['temperature_2m (°C)'].mean()
# mean_value_soilMoisture = df['soil_moisture'].mean()
# plt.figure(figsize=(8,6))
# plt.title('Values of Temperature vs. Soil Moisture')
# plt.scatter('temperature_2m (°C)', 'soil_moisture', data=df)
# plt.xlabel('Soil Moisture')
# plt.ylabel('Temperature')

['temperature_2m (°C)', 'relative_humidity_2m (%)', 'rain (mm)', 'surface_pressure (hPa)', 'cloud_cover (%)', 'wind_direction_10m (°)', 'snowfall(mm)', 'wind_speed(m/s)', 'wind_gust(m/s)']


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    mape = 100 * (mae / y_test)
    accuracy = 100 - np.mean(mape)
    
    print(f"Model: {type(model).__name__}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R-squared (R²): {r2:.4f}")
    print('Accuracy:', round(accuracy, 2), '%.')
    print("="*50)


models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    DecisionTreeRegressor(),
    RandomForestRegressor(n_estimators=100),
    GradientBoostingRegressor(n_estimators=100),
    XGBRegressor(),
    SVR(),
    MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=500),
    KNeighborsRegressor(n_neighbors=5)
]

for model in models:
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)

Model: LinearRegression
Mean Squared Error (MSE): 31.2221
Root Mean Squared Error (RMSE): 5.5877
Mean Absolute Error (MAE): 4.4958
R-squared (R²): 0.5101
Accuracy: 83.07 %.
Model: Ridge
Mean Squared Error (MSE): 31.2221
Root Mean Squared Error (RMSE): 5.5877
Mean Absolute Error (MAE): 4.4958
R-squared (R²): 0.5101
Accuracy: 83.07 %.
Model: Lasso
Mean Squared Error (MSE): 31.3518
Root Mean Squared Error (RMSE): 5.5993
Mean Absolute Error (MAE): 4.5138
R-squared (R²): 0.5080
Accuracy: 83.0 %.
Model: DecisionTreeRegressor
Mean Squared Error (MSE): 39.7492
Root Mean Squared Error (RMSE): 6.3047
Mean Absolute Error (MAE): 3.9738
R-squared (R²): 0.3763
Accuracy: 85.03 %.
Model: RandomForestRegressor
Mean Squared Error (MSE): 19.3921
Root Mean Squared Error (RMSE): 4.4036
Mean Absolute Error (MAE): 3.2209
R-squared (R²): 0.6957
Accuracy: 87.87 %.
Model: GradientBoostingRegressor
Mean Squared Error (MSE): 27.3800
Root Mean Squared Error (RMSE): 5.2326
Mean Absolute Error (MAE): 4.0844
R-square

In [None]:
myTemp = [26.5, 55, 13.2, 1010, 70, 270, 10.20, 200, 260]

# # Convert to a NumPy array and reshape to (1, 9)
# myTemp_reshaped = np.array(myTemp).reshape(1, -1)

# # Now myTemp_reshaped has the shape (1, 9)
# print(myTemp_reshaped.shape) 

myTemp_df = pd.DataFrame([myTemp], columns=[f'feature_{i}' for i in range(9)])

# Now myTemp_df has the shape (1, 9)
print(myTemp_df.shape)  # Output: (1, 9)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# Assuming scaler was fitted on the training data
myTemp_scaled = scaler.fit_transform(myTemp_df)

# Now pass the scaled data to the model
predictions = rf.predict(myTemp_scaled)


In [None]:
column = df['wind_gust(m/s)']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

In [None]:
print(predictions)

In [None]:
df_sorted = df.sort_values(by='rain (mm)', ascending=False)

In [None]:
df_sorted.head(20)

In [None]:
df.to_csv('WaTime_Dataset.csv', index=False)

In [None]:
df_filtered = df[df['rain (mm)'] >=1]

df_filtered.head(5000)