In [11]:
import pandas as pd
df = pd.read_csv("/kaggle/input/crop-yield-prediction-dataset/yield_df.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,Area,Item,Year,hg/ha_yield,average_rain_fall_mm_per_year,pesticides_tonnes,avg_temp
0,0,Albania,Maize,1990,36613,1485.0,121.0,16.37
1,1,Albania,Potatoes,1990,66667,1485.0,121.0,16.37
2,2,Albania,"Rice, paddy",1990,23333,1485.0,121.0,16.37
3,3,Albania,Sorghum,1990,12500,1485.0,121.0,16.37
4,4,Albania,Soybeans,1990,7000,1485.0,121.0,16.37


In [12]:
print(df.columns.tolist())


['Unnamed: 0', 'Area', 'Item', 'Year', 'hg/ha_yield', 'average_rain_fall_mm_per_year', 'pesticides_tonnes', 'avg_temp']


In [13]:
df.drop(columns=["Unnamed: 0"], inplace=True)

In [14]:
df.dtypes

Area                              object
Item                              object
Year                               int64
hg/ha_yield                        int64
average_rain_fall_mm_per_year    float64
pesticides_tonnes                float64
avg_temp                         float64
dtype: object

In [15]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
import numpy as np

# --- Features and target ---
X = df.drop("hg/ha_yield", axis=1)
y = df["hg/ha_yield"]

# --- Identify categorical and numerical features ---
cat_features = ["Area", "Item"]
num_features = ["average_rain_fall_mm_per_year", "pesticides_tonnes", "avg_temp", "Year"]

# --- Preprocessing: One-hot encode categorical variables ---
preprocessor = ColumnTransformer([
    ("cat", OneHotEncoder(handle_unknown="ignore"), cat_features)
], remainder='passthrough')

# --- Split data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Random Forest Model ---
rf_model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=200, random_state=42))
])

rf_model.fit(X_train, y_train)
rf_preds = rf_model.predict(X_test)

# --- XGBoost Model ---
xgb_model = Pipeline([
    ("preprocessor", preprocessor),
    ("regressor", XGBRegressor(n_estimators=300, learning_rate=0.1, random_state=42))
])

xgb_model.fit(X_train, y_train)
xgb_preds = xgb_model.predict(X_test)

# --- Evaluation ---
def evaluate(y_true, y_pred, model_name):
    print(f"\nðŸ“ˆ {model_name} Results:")
    print(f"RÂ² Score: {r2_score(y_true, y_pred):.4f}")
    print(f"RMSE: {np.sqrt(mean_squared_error(y_true, y_pred)):.2f}")

evaluate(y_test, rf_preds, "Random Forest")
evaluate(y_test, xgb_preds, "XGBoost")



ðŸ“ˆ Random Forest Results:
RÂ² Score: 0.9877
RMSE: 9453.84

ðŸ“ˆ XGBoost Results:
RÂ² Score: 0.9741
RMSE: 13714.82


In [None]:
!pip install tensorflow

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split

# --- CNN: Crop Disease Detection ---
X_img, y_img = np.random.rand(100,64,64,3), np.random.randint(2,size=100)
cnn = Sequential([
    Conv2D(32,(3,3),activation='relu',input_shape=(64,64,3)),
    MaxPooling2D(2,2), 
    Flatten(),
    Dense(128,activation='relu'),
    Dense(1,activation='sigmoid')
])
cnn.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
cnn.fit(X_img,y_img,epochs=3,batch_size=10)

# --- Random Forest: Yield Prediction ---
X, y = np.random.rand(100,3), np.random.rand(100)*100
Xtr, Xte, ytr, yte = train_test_split(X,y,test_size=0.2,random_state=42)
rf = RandomForestRegressor(n_estimators=100,random_state=42).fit(Xtr,ytr)

# --- Recommendation ---
def recommend(d_pred,y_pred):
    if d_pred>=0.5: return "Disease detected! Apply pesticide."
    if y_pred<50: return "Low yield! Improve irrigation."
    return "Crop healthy, yield optimal."

# --- Test with Simulated Inputs ---
test_img = np.random.rand(1,64,64,3)
d_pred = cnn.predict(test_img)[0][0]
y_pred = rf.predict([[0.8,0.6,0.7]])[0]
print(f"Disease Prediction: {d_pred:.4f} (0:Healthy,1:Diseased)")
print(f"Yield Prediction: {y_pred:.2f} units")
print("Recommendation:", recommend(d_pred,y_pred))
