# Irrigation Predictor Modeling

#### The model utilizes weather features of Johannesburg to predict soil moisture for the system to either irrigate or not to.

In [1]:
import pandas as pd 
import numpy as np 
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns 
%matplotlib inline

In [2]:
df = pd.read_csv('New_Dataset.csv', sep=',')
df.head()

Unnamed: 0,time,temperature_2m (°C),relative_humidity_2m (%),precipitation (mm),rain (mm),snowfall (cm),snow_depth (m),surface_pressure (hPa),cloud_cover (%),et0_fao_evapotranspiration (mm),wind_speed_10m (km/h),wind_direction_10m (°),wind_gusts_10m (km/h),soil_temperature_0_to_7cm (°C),soil_temperature_7_to_28cm (°C),soil_temperature_28_to_100cm (°C),soil_moisture_0_to_7cm (m³/m³),soil_moisture_7_to_28cm (m³/m³)
0,2010-01-01T00:00,-2.3,86,0.2,0.0,0.14,0.09,992.7,100,0.01,18.0,70,33.8,-0.6,0.1,2.3,0.371,0.377
1,2010-01-01T01:00,-2.5,87,0.3,0.0,0.21,0.09,992.5,100,0.0,15.8,66,33.5,-0.6,0.1,2.3,0.371,0.377
2,2010-01-01T02:00,-2.6,87,0.4,0.0,0.28,0.09,992.4,100,0.0,14.8,56,31.3,-0.6,0.1,2.3,0.371,0.377
3,2010-01-01T03:00,-2.6,88,0.5,0.0,0.35,0.09,992.1,100,0.0,16.0,54,30.2,-0.6,0.1,2.3,0.371,0.377
4,2010-01-01T04:00,-2.7,88,0.5,0.0,0.35,0.1,991.6,100,0.0,16.3,55,30.2,-0.6,0.1,2.3,0.371,0.377


### Data Exploration

In [3]:
df.shape

(128592, 18)

In [4]:
df.dtypes

time                                  object
temperature_2m (°C)                  float64
relative_humidity_2m (%)               int64
precipitation (mm)                   float64
rain (mm)                            float64
snowfall (cm)                        float64
snow_depth (m)                       float64
surface_pressure (hPa)               float64
cloud_cover (%)                        int64
et0_fao_evapotranspiration (mm)      float64
wind_speed_10m (km/h)                float64
wind_direction_10m (°)                 int64
wind_gusts_10m (km/h)                float64
soil_temperature_0_to_7cm (°C)       float64
soil_temperature_7_to_28cm (°C)      float64
soil_temperature_28_to_100cm (°C)    float64
soil_moisture_0_to_7cm (m³/m³)       float64
soil_moisture_7_to_28cm (m³/m³)      float64
dtype: object

In [5]:
df.drop(['time', 'snow_depth (m)', 'et0_fao_evapotranspiration (mm)', 'precipitation (mm)',
       'soil_temperature_0_to_7cm (°C)', 'soil_temperature_7_to_28cm (°C)',
       'soil_temperature_28_to_100cm (°C)', 'soil_moisture_0_to_7cm (m³/m³)'], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),snowfall (cm),surface_pressure (hPa),cloud_cover (%),wind_speed_10m (km/h),wind_direction_10m (°),wind_gusts_10m (km/h),soil_moisture_7_to_28cm (m³/m³)
0,-2.3,86,0.0,0.14,992.7,100,18.0,70,33.8,0.377
1,-2.5,87,0.0,0.21,992.5,100,15.8,66,33.5,0.377
2,-2.6,87,0.0,0.28,992.4,100,14.8,56,31.3,0.377
3,-2.6,88,0.0,0.35,992.1,100,16.0,54,30.2,0.377
4,-2.7,88,0.0,0.35,991.6,100,16.3,55,30.2,0.377


#### Convert units of measurement of snowfall, wind speed & wind gust
##### 1. snowfall: cm - mm
##### 2. wind speed: km/h - m/s
##### 3. wind direction: km/h - m/s

In [7]:
# snowfall conversion
df['snowfall(mm)'] = df['snowfall (cm)'] * 10

#wind speed conversion
df['wind_speed(m/s)'] = df['wind_speed_10m (km/h)'] * 3.6

#wind gust conversion
df['wind_gust(m/s)'] = df['wind_gusts_10m (km/h)'] * 3.6

In [8]:
df.drop(['wind_gusts_10m (km/h)', 'snowfall (cm)', 'wind_speed_10m (km/h)'], axis=1, inplace=True)

In [9]:
rename_column = {'soil_moisture_7_to_28cm (m³/m³)': 'soil_moisture'}

df.rename(columns=rename_column, inplace=True)

In [10]:
last_column = df.pop('soil_moisture')
 
df.insert(df.shape[1], 'soil_moisture', last_column)

In [11]:
df.head()

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),surface_pressure (hPa),cloud_cover (%),wind_direction_10m (°),snowfall(mm),wind_speed(m/s),wind_gust(m/s),soil_moisture
0,-2.3,86,0.0,992.7,100,70,1.4,64.8,121.68,0.377
1,-2.5,87,0.0,992.5,100,66,2.1,56.88,120.6,0.377
2,-2.6,87,0.0,992.4,100,56,2.8,53.28,112.68,0.377
3,-2.6,88,0.0,992.1,100,54,3.5,57.6,108.72,0.377
4,-2.7,88,0.0,991.6,100,55,3.5,58.68,108.72,0.377


In [12]:
column = df['soil_moisture']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

The minimum value of soil moisture is 0.129
The maximum value of soil moisture is 0.439
The average value of soil moisture is 0.2919513655592883
The median value of soil moisture is 0.314


In [24]:
df['soil_moisture'] = df['soil_moisture'] * 100

In [26]:
column = df['soil_moisture']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

The minimum value of soil moisture is 12.9
The maximum value of soil moisture is 43.9
The average value of soil moisture is 29.195136555928823
The median value of soil moisture is 31.4


In [4]:
from IPython.display import Image 

Image(url="Soil Moisture Predictor.png", width=600, height=400) 


In [13]:
df.isnull().sum()

temperature_2m (°C)         0
relative_humidity_2m (%)    0
rain (mm)                   0
surface_pressure (hPa)      0
cloud_cover (%)             0
wind_direction_10m (°)      0
snowfall(mm)                0
wind_speed(m/s)             0
wind_gust(m/s)              0
soil_moisture               0
dtype: int64

In [14]:
df.describe()

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),surface_pressure (hPa),cloud_cover (%),wind_direction_10m (°),snowfall(mm),wind_speed(m/s),wind_gust(m/s),soil_moisture
count,128592.0,128592.0,128592.0,128592.0,128592.0,128592.0,128592.0,128592.0,128592.0,128592.0
mean,10.554573,75.224034,0.067026,1010.801114,54.64609,196.279652,0.033799,47.219846,92.398418,0.291951
std,8.359645,17.107726,0.310522,9.1215,37.124637,92.361909,0.335772,22.502015,43.015395,0.079788
min,-20.6,14.0,0.0,961.1,0.0,1.0,0.0,0.0,3.96,0.129
25%,4.3,64.0,0.0,1005.3,21.0,116.0,0.0,30.24,59.76,0.219
50%,10.3,79.0,0.0,1011.2,54.0,217.0,0.0,43.92,86.76,0.314
75%,16.8,90.0,0.0,1016.6,94.0,270.0,0.0,60.48,118.08,0.359
max,37.7,100.0,15.8,1042.0,100.0,360.0,16.1,216.72,400.32,0.439


### Visualization

In [15]:
df.shape

(128592, 10)

In [16]:
df.columns

Index(['temperature_2m (°C)', 'relative_humidity_2m (%)', 'rain (mm)',
       'surface_pressure (hPa)', 'cloud_cover (%)', 'wind_direction_10m (°)',
       'snowfall(mm)', 'wind_speed(m/s)', 'wind_gust(m/s)', 'soil_moisture'],
      dtype='object')

In [17]:
X = df.drop('soil_moisture',axis=1)
y = np.array(df['soil_moisture'])

feature_list = list(X.columns)
print(feature_list)
# mean_value_temperature = df['temperature_2m (°C)'].mean()
# mean_value_soilMoisture = df['soil_moisture'].mean()
# plt.figure(figsize=(8,6))
# plt.title('Values of Temperature vs. Soil Moisture')
# plt.scatter('temperature_2m (°C)', 'soil_moisture', data=df)
# plt.xlabel('Soil Moisture')
# plt.ylabel('Temperature')

['temperature_2m (°C)', 'relative_humidity_2m (%)', 'rain (mm)', 'surface_pressure (hPa)', 'cloud_cover (%)', 'wind_direction_10m (°)', 'snowfall(mm)', 'wind_speed(m/s)', 'wind_gust(m/s)']


In [18]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"Model: {type(model).__name__}")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R-squared (R²): {r2:.4f}")
    print("="*50)


models = [
    LinearRegression(),
    Ridge(),
    Lasso(),
    DecisionTreeRegressor(),
    RandomForestRegressor(n_estimators=100),
    GradientBoostingRegressor(n_estimators=100),
    XGBRegressor(),
    SVR(),
    MLPRegressor(hidden_layer_sizes=(100, 100), max_iter=500),
    KNeighborsRegressor(n_neighbors=5)
]

for model in models:
    model.fit(X_train, y_train)
    evaluate_model(model, X_test, y_test)

Model: LinearRegression
Mean Squared Error (MSE): 0.0031
Root Mean Squared Error (RMSE): 0.0559
Mean Absolute Error (MAE): 0.0450
R-squared (R²): 0.5101
Model: Ridge
Mean Squared Error (MSE): 0.0031
Root Mean Squared Error (RMSE): 0.0559
Mean Absolute Error (MAE): 0.0450
R-squared (R²): 0.5101
Model: Lasso
Mean Squared Error (MSE): 0.0064
Root Mean Squared Error (RMSE): 0.0798
Mean Absolute Error (MAE): 0.0697
R-squared (R²): -0.0000
Model: DecisionTreeRegressor
Mean Squared Error (MSE): 0.0039
Root Mean Squared Error (RMSE): 0.0628
Mean Absolute Error (MAE): 0.0396
R-squared (R²): 0.3804
Model: RandomForestRegressor
Mean Squared Error (MSE): 0.0019
Root Mean Squared Error (RMSE): 0.0441
Mean Absolute Error (MAE): 0.0322
R-squared (R²): 0.6955
Model: GradientBoostingRegressor
Mean Squared Error (MSE): 0.0027
Root Mean Squared Error (RMSE): 0.0523
Mean Absolute Error (MAE): 0.0408
R-squared (R²): 0.5704
Model: XGBRegressor
Mean Squared Error (MSE): 0.0024
Root Mean Squared Error (RMSE):

In [19]:
# x_ax = range(len(y_test))
# plt.plot(x_ax, y_test, label='Original')
# plt.plot(x_ax, y_pred, label='Predicted')
# plt.title('Soil Moisture prediction')
# plt.legend()

In [20]:
rf = RandomForestRegressor(n_estimators=100)
rf.fit(X_train, y_train)
y_pred = rf.predict(X_test)

In [21]:
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Model: {type(model).__name__}")
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"R-squared (R²): {r2:.4f}")

Model: KNeighborsRegressor
Mean Squared Error (MSE): 0.0019
Root Mean Squared Error (RMSE): 0.0441
Mean Absolute Error (MAE): 0.0322
R-squared (R²): 0.6950


In [22]:
mape = 100 * (mae / y_test)

accuracy = 100 - np.mean(mape)
print('Accuracy:', round(accuracy, 2), '%.')

Accuracy: 87.85 %.


In [54]:
from sklearn.tree import export_graphviz
import pydot

# Pull out one tree from the forest
tree = rf.estimators_[5]

# Export the image to a dot file
export_graphviz(tree, out_file = 'tree.dot', feature_names = feature_list, rounded = True, precision = 1)

# Use dot file to create a graph
(graph, ) = pydot.graph_from_dot_file('tree.dot')

# Write graph to a png file
graph.write_png('tree.png'); 

KeyboardInterrupt: 

In [43]:
myTemp = [26.5, 55, 13.2, 1010, 70, 270, 10.20, 200, 260]

# # Convert to a NumPy array and reshape to (1, 9)
# myTemp_reshaped = np.array(myTemp).reshape(1, -1)

# # Now myTemp_reshaped has the shape (1, 9)
# print(myTemp_reshaped.shape) 

myTemp_df = pd.DataFrame([myTemp], columns=[f'feature_{i}' for i in range(9)])

# Now myTemp_df has the shape (1, 9)
print(myTemp_df.shape)  # Output: (1, 9)

from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
# Assuming scaler was fitted on the training data
myTemp_scaled = scaler.fit_transform(myTemp_df)

# Now pass the scaled data to the model
predictions = rf.predict(myTemp_scaled)


(1, 9)




In [35]:
column = df['wind_gust(m/s)']
print(f'The minimum value of soil moisture is {column.min()}')
print(f'The maximum value of soil moisture is {column.max()}')
print(f'The average value of soil moisture is {column.mean()}')
print(f'The median value of soil moisture is {column.median()}')

The minimum value of soil moisture is 3.9600000000000004
The maximum value of soil moisture is 400.32
The average value of soil moisture is 92.3984182530795
The median value of soil moisture is 86.76


In [44]:
print(predictions)

[0.33601]


In [51]:
df_sorted = df.sort_values(by='soil_moisture', ascending=True)

In [53]:
df_sorted.head(5000)

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),surface_pressure (hPa),cloud_cover (%),wind_direction_10m (°),snowfall(mm),wind_speed(m/s),wind_gust(m/s),soil_moisture
76444,17.1,71,0.0,1007.0,28,182,0.0,36.36,58.32,12.9
76459,16.4,75,0.5,1002.2,90,284,0.0,95.04,259.20,12.9
76458,24.3,40,0.0,999.6,93,272,0.0,149.04,239.76,12.9
76457,28.1,32,0.0,998.3,88,212,0.0,120.60,224.28,12.9
76456,28.9,35,0.0,998.8,50,216,0.0,102.96,243.72,12.9
...,...,...,...,...,...,...,...,...,...,...
90830,9.9,40,0.0,1011.4,100,273,0.0,70.20,149.04,15.3
90829,11.2,37,0.0,1011.3,100,280,0.0,74.88,146.52,15.3
100277,16.3,77,0.0,1014.8,32,51,0.0,16.56,19.44,15.3
108283,20.9,31,0.0,1020.7,26,118,0.0,30.60,71.28,15.3


In [55]:
df.to_csv('WaTime_Dataset.csv', index=False)

In [59]:
df_filtered = df[df['rain (mm)'] >=1]

df_filtered.head(5000)

Unnamed: 0,temperature_2m (°C),relative_humidity_2m (%),rain (mm),surface_pressure (hPa),cloud_cover (%),wind_direction_10m (°),snowfall(mm),wind_speed(m/s),wind_gust(m/s),soil_moisture
1276,4.7,80,1.2,983.7,100,198,0.0,80.28,164.52,36.1
1748,4.5,90,1.3,1004.0,100,285,0.0,78.12,211.32,41.2
1749,3.5,89,1.1,1004.2,100,296,0.0,85.32,216.36,41.9
1907,11.4,92,1.3,1001.9,100,227,0.0,67.68,124.56,38.9
1908,11.8,91,1.5,1002.2,100,236,0.0,72.00,133.56,39.0
...,...,...,...,...,...,...,...,...,...,...
127913,21.5,72,1.7,1006.3,100,273,0.0,44.28,105.12,24.0
128036,22.0,79,1.1,1008.6,73,221,0.0,53.28,103.68,21.5
128037,20.7,88,1.0,1008.9,72,260,0.0,61.92,110.16,21.5
128162,21.8,92,1.4,1008.2,82,281,0.0,6.48,38.88,17.5
