In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load your dataset
data = pd.read_csv('training_dataset_wheat.csv')

# # Print the column names to check for any discrepancies
# print(data.columns)


# Step 1: Handle Date Column
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y')
data['Days_Since_Start'] = (data['Date'] - data['Date'].min()).dt.days
data.drop(columns=['Date'], inplace=True)

# Step 2: Handle Categorical Variables
label_encoder = LabelEncoder()
data['Phase'] = label_encoder.fit_transform(data['Phase'])

# Step 3: Check for Missing or Invalid Data
data.fillna(0, inplace=True)

# Step 4: Select Specific Columns
# Specify the columns you want to use as features (X)
selected_columns = [
    'Days_Since_Start', 'Phase', 
    'Soil Moisture Difference',  'Etc(in m)', 'Rainfall(in mm)',
    'Soil bulk Density (g/cm3)', 'Root Zone Depth(cm)','NIR( in m)','Volume_SM(to match req S.M)', 'Volume (Rainfall)','Volume(Etc)','Final Volume'
]
X = data[selected_columns]

# The target column
y = data['Irrigation Req']

# Step 5: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 6: Train the Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Step 7: Make Predictions and Evaluate
y_pred = model.predict(X_test)

# Print evaluation metrics
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))



Mean Squared Error (MSE): 4.0384648629646697e-19
R² Score: 1.0
