In [3]:
#IMPORTING ALL THE NECESSARY LIBRARIES
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.preprocessing import MinMaxScaler
import joblib

In [4]:
# -------------------------------
# STEP 1 : PREPROCESSING THE DATA
# -------------------------------
   
# Step 1.1 : Load the dataset

df=pd.read_csv("irrigation_dataset.csv")

In [5]:
# Printing the first five rows of the dataset to verify loading

print(df.head())

   Unnamed: 0  sensor_0  sensor_1  sensor_2  sensor_3  sensor_4  sensor_5  \
0           0       1.0       2.0       1.0       7.0       0.0       1.0   
1           1       5.0       1.0       3.0       5.0       2.0       2.0   
2           2       3.0       1.0       4.0       3.0       4.0       0.0   
3           3       2.0       2.0       4.0       3.0       5.0       0.0   
4           4       4.0       3.0       3.0       2.0       5.0       1.0   

   sensor_6  sensor_7  sensor_8  ...  sensor_13  sensor_14  sensor_15  \
0       1.0       4.0       0.0  ...        8.0        1.0        0.0   
1       1.0       2.0       3.0  ...        4.0        5.0        5.0   
2       1.0       6.0       0.0  ...        3.0        3.0        1.0   
3       3.0       2.0       2.0  ...        4.0        1.0        1.0   
4       3.0       1.0       1.0  ...        1.0        3.0        2.0   

   sensor_16  sensor_17  sensor_18  sensor_19  parcel_0  parcel_1  parcel_2  
0        2.0        

In [6]:
df.info()  # Checking the dataset information

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 24 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Unnamed: 0  2000 non-null   int64  
 1   sensor_0    2000 non-null   float64
 2   sensor_1    2000 non-null   float64
 3   sensor_2    2000 non-null   float64
 4   sensor_3    2000 non-null   float64
 5   sensor_4    2000 non-null   float64
 6   sensor_5    2000 non-null   float64
 7   sensor_6    2000 non-null   float64
 8   sensor_7    2000 non-null   float64
 9   sensor_8    2000 non-null   float64
 10  sensor_9    2000 non-null   float64
 11  sensor_10   2000 non-null   float64
 12  sensor_11   2000 non-null   float64
 13  sensor_12   2000 non-null   float64
 14  sensor_13   2000 non-null   float64
 15  sensor_14   2000 non-null   float64
 16  sensor_15   2000 non-null   float64
 17  sensor_16   2000 non-null   float64
 18  sensor_17   2000 non-null   float64
 19  sensor_18   2000 non-null  

In [7]:
df.columns  # Checking the columns of the dataset

Index(['Unnamed: 0', 'sensor_0', 'sensor_1', 'sensor_2', 'sensor_3',
       'sensor_4', 'sensor_5', 'sensor_6', 'sensor_7', 'sensor_8', 'sensor_9',
       'sensor_10', 'sensor_11', 'sensor_12', 'sensor_13', 'sensor_14',
       'sensor_15', 'sensor_16', 'sensor_17', 'sensor_18', 'sensor_19',
       'parcel_0', 'parcel_1', 'parcel_2'],
      dtype='object')

In [8]:
df=df.drop('Unnamed: 0', axis=1)  # Dropping the first unnecessary column
df.head() # Displaying the first five rows after dropping the column

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
0,1.0,2.0,1.0,7.0,0.0,1.0,1.0,4.0,0.0,3.0,...,8.0,1.0,0.0,2.0,1.0,9.0,2.0,0,1,0
1,5.0,1.0,3.0,5.0,2.0,2.0,1.0,2.0,3.0,1.0,...,4.0,5.0,5.0,2.0,2.0,2.0,7.0,0,0,0
2,3.0,1.0,4.0,3.0,4.0,0.0,1.0,6.0,0.0,2.0,...,3.0,3.0,1.0,0.0,3.0,1.0,0.0,1,1,0
3,2.0,2.0,4.0,3.0,5.0,0.0,3.0,2.0,2.0,5.0,...,4.0,1.0,1.0,4.0,1.0,3.0,2.0,0,0,0
4,4.0,3.0,3.0,2.0,5.0,1.0,3.0,1.0,1.0,2.0,...,1.0,3.0,2.0,2.0,1.0,1.0,0.0,1,1,0


In [9]:
df.describe()   # Displaying the statistical summary of the dataset

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,parcel_0,parcel_1,parcel_2
count,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,...,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0,2000.0
mean,1.437,1.659,2.6545,2.6745,2.8875,1.411,3.3155,4.2015,1.214,1.901,...,2.7315,3.416,1.2065,2.325,1.7295,2.2745,1.8135,0.6355,0.7305,0.212
std,1.321327,1.338512,1.699286,1.855875,1.816451,1.339394,2.206444,2.280241,1.386782,1.518668,...,1.774537,1.960578,1.258034,1.715181,1.561265,1.67169,1.469285,0.48141,0.443811,0.408827
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,1.0,1.0,1.0,2.0,0.0,2.0,3.0,0.0,1.0,...,1.0,2.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0,0.0
50%,1.0,1.0,2.0,2.0,3.0,1.0,3.0,4.0,1.0,2.0,...,2.0,3.0,1.0,2.0,1.0,2.0,2.0,1.0,1.0,0.0
75%,2.0,2.0,4.0,4.0,4.0,2.0,5.0,6.0,2.0,3.0,...,4.0,5.0,2.0,3.0,3.0,3.0,3.0,1.0,1.0,0.0
max,8.0,9.0,10.0,11.0,12.0,7.0,13.0,12.0,8.0,9.0,...,11.0,11.0,6.0,10.0,11.0,10.0,7.0,1.0,1.0,1.0


In [10]:
# -------------------------------
# STEP 2 : DEFINE ALL THE FEATURES
# -------------------------------

# Step 2.1 : Define the features and target variable

X=df.iloc[:,0:20]  # This gives you columns 0 to 19 (sensor_0 to sensor_19)
y=df.iloc[:,20:0]  # This gives you rows 20 to 0 (the target variable) 

In [11]:
X.sample(10)  # Displaying a random sample of the features of size 10

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
1931,1.0,1.0,6.0,3.0,1.0,1.0,2.0,7.0,1.0,0.0,6.0,5.0,6.0,5.0,2.0,3.0,3.0,0.0,0.0,0.0
1783,2.0,2.0,3.0,7.0,3.0,0.0,4.0,5.0,0.0,0.0,6.0,3.0,5.0,1.0,2.0,0.0,5.0,1.0,2.0,1.0
560,0.0,3.0,3.0,1.0,0.0,2.0,1.0,2.0,1.0,0.0,7.0,3.0,7.0,6.0,4.0,0.0,0.0,0.0,3.0,5.0
1493,0.0,1.0,4.0,6.0,1.0,0.0,2.0,4.0,0.0,0.0,1.0,4.0,7.0,2.0,5.0,1.0,3.0,2.0,1.0,1.0
1098,0.0,3.0,3.0,4.0,2.0,0.0,0.0,6.0,0.0,3.0,4.0,6.0,3.0,6.0,4.0,2.0,1.0,0.0,6.0,3.0
784,0.0,0.0,2.0,1.0,3.0,4.0,4.0,3.0,2.0,2.0,2.0,3.0,3.0,2.0,2.0,1.0,2.0,1.0,0.0,4.0
1523,0.0,2.0,3.0,5.0,4.0,2.0,1.0,3.0,2.0,4.0,1.0,2.0,3.0,3.0,2.0,0.0,4.0,0.0,1.0,0.0
1085,0.0,0.0,5.0,2.0,1.0,4.0,3.0,1.0,4.0,1.0,1.0,3.0,3.0,2.0,0.0,0.0,3.0,2.0,2.0,0.0
1136,1.0,1.0,2.0,9.0,2.0,0.0,1.0,6.0,2.0,3.0,4.0,3.0,5.0,4.0,3.0,0.0,0.0,1.0,1.0,0.0
1514,2.0,1.0,2.0,0.0,3.0,0.0,5.0,2.0,2.0,1.0,5.0,3.0,3.0,2.0,4.0,1.0,4.0,2.0,0.0,0.0


In [12]:
y.sample(10)  # Displaying a random sample of the target variable of size 10

370
655
401
625
1166
770
686
595
1264
1728


In [13]:
X.info()  # Checking the features information

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Data columns (total 20 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   sensor_0   2000 non-null   float64
 1   sensor_1   2000 non-null   float64
 2   sensor_2   2000 non-null   float64
 3   sensor_3   2000 non-null   float64
 4   sensor_4   2000 non-null   float64
 5   sensor_5   2000 non-null   float64
 6   sensor_6   2000 non-null   float64
 7   sensor_7   2000 non-null   float64
 8   sensor_8   2000 non-null   float64
 9   sensor_9   2000 non-null   float64
 10  sensor_10  2000 non-null   float64
 11  sensor_11  2000 non-null   float64
 12  sensor_12  2000 non-null   float64
 13  sensor_13  2000 non-null   float64
 14  sensor_14  2000 non-null   float64
 15  sensor_15  2000 non-null   float64
 16  sensor_16  2000 non-null   float64
 17  sensor_17  2000 non-null   float64
 18  sensor_18  2000 non-null   float64
 19  sensor_19  2000 non-null   float64
dtypes: float

In [14]:
y.info() # Checking the target variable information

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2000 entries, 0 to 1999
Empty DataFrame


In [15]:
X # Viewing the features DataFrame

Unnamed: 0,sensor_0,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,sensor_6,sensor_7,sensor_8,sensor_9,sensor_10,sensor_11,sensor_12,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19
0,1.0,2.0,1.0,7.0,0.0,1.0,1.0,4.0,0.0,3.0,1.0,3.0,6.0,8.0,1.0,0.0,2.0,1.0,9.0,2.0
1,5.0,1.0,3.0,5.0,2.0,2.0,1.0,2.0,3.0,1.0,3.0,2.0,2.0,4.0,5.0,5.0,2.0,2.0,2.0,7.0
2,3.0,1.0,4.0,3.0,4.0,0.0,1.0,6.0,0.0,2.0,3.0,2.0,4.0,3.0,3.0,1.0,0.0,3.0,1.0,0.0
3,2.0,2.0,4.0,3.0,5.0,0.0,3.0,2.0,2.0,5.0,3.0,1.0,2.0,4.0,1.0,1.0,4.0,1.0,3.0,2.0
4,4.0,3.0,3.0,2.0,5.0,1.0,3.0,1.0,1.0,2.0,4.0,5.0,3.0,1.0,3.0,2.0,2.0,1.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,4.0,1.0,2.0,2.0,1.0,1.0,1.0,2.0,1.0,2.0,4.0,3.0,3.0,1.0,2.0,3.0,2.0,1.0,1.0,0.0
1996,1.0,3.0,3.0,3.0,2.0,2.0,3.0,3.0,1.0,5.0,2.0,2.0,4.0,3.0,3.0,0.0,1.0,0.0,6.0,2.0
1997,1.0,3.0,3.0,1.0,1.0,4.0,8.0,1.0,0.0,0.0,3.0,2.0,4.0,2.0,3.0,4.0,4.0,4.0,1.0,0.0
1998,2.0,1.0,0.0,2.0,2.0,0.0,1.0,3.0,0.0,0.0,0.0,5.0,2.0,2.0,4.0,0.0,2.0,0.0,3.0,0.0


In [16]:
X.shape,y.shape  # Checking the shape of features and target variable

((2000, 20), (2000, 0))