In [26]:
import geopandas as gpd
import pandas as pd
import geemap
import ee


In [27]:
cropfile = 'pabbi_crop.geojson'
gdf = gpd.read_file(cropfile)
gdf.head()

Unnamed: 0,Mouza_Name,Landuse_Ma,Area_Acre,FFID,Parcel_ID,Crop_Type,geometry
0,Khushmaqam,Agriculture,0.101785,1,668.0,,"MULTIPOLYGON (((753923.077 3769111.141, 753894..."
1,Khushmaqam,Built up,0.036718,2,670.0,,"MULTIPOLYGON (((753959.894 3769126.291, 753959..."
2,Khushmaqam,Agriculture,0.315557,3,632.0,,"MULTIPOLYGON (((753839.267 3769129.693, 753841..."
3,Khushmaqam,Agriculture,0.187644,4,669.0,,"MULTIPOLYGON (((753952.852 3769131.737, 753946..."
4,Khushmaqam,Agriculture,0.161568,5,693.0,,"MULTIPOLYGON (((754361.202 3769166.424, 754269..."


In [28]:
# Filtering the composite labeling
#step 1: Remove rows with '/' in 'Crop_Type'
gdf_filtered = gdf[~gdf['Crop_Type'].fillna('').str.contains('/')]

#Step 2: Remove the nocrop type from the filtered data
nocrop = ['No crop' ,'Mix', 'Trees', 'Buitup' ,'Water body', 'Water Channel','Other','Builtup','Barren','Barley','Garlic','Egg Plant','Potato','Indian Squash' ]
gdf_filtered = gdf_filtered[~gdf_filtered['Crop_Type'].isin(nocrop)]

#Step 3: Final filtered data
gdf_filtered['Crop_Type'].value_counts()
print(gdf_filtered['Crop_Type'].value_counts())
print ('Mean of Crop_Type',gdf_filtered['Crop_Type'].value_counts().mean())

# 0) Drop any rows with missing Crop_Type
gdf_filtered = gdf_filtered.dropna(subset=['Crop_Type'])

Crop_Type
Tomato            94
Wheat             81
Persian Clover    57
Sugarcane         52
Corn              16
Orchard            6
Eucalyptus         5
Lady Finger        5
Name: count, dtype: int64
Mean of Crop_Type 39.5


In [29]:
major_crop = ['Tomato', 'Wheat','Persian Clover','Sugarcane','Corn']
minor_crop = [m for m in gdf_filtered['Crop_Type'].unique() if m not in major_crop]
print('Major Crop:',major_crop)
print('Minor Crop:',minor_crop)

Major Crop: ['Tomato', 'Wheat', 'Persian Clover', 'Sugarcane', 'Corn']
Minor Crop: ['Orchard', 'Eucalyptus', 'Lady Finger']


In [30]:
# 2) Compute target sample count for minor classes
counts = gdf_filtered['Crop_Type'].value_counts()
mean_imp = counts[major_crop].mean()      # average size of the major classes
target_min = int(mean_imp * 0.6)          # e.g. 60% of that
print(f"Major mean = {mean_imp:.1f}, so target for minor = {target_min}")


Major mean = 60.0, so target for minor = 36


In [31]:
# 3) Build the new stratified‐balanced GeoDataFrame
balanced_parts = []
for crop, group in gdf_filtered.groupby('Crop_Type'):
    n = len(group)
    if crop in major_crop:
        # keep all major‐crop samples
        balanced_parts.append(group)
    else:
        # oversample minor up to target_min
        if n < target_min:
            sampled = group.sample(n=target_min, replace=True, random_state=42)
        else:
            sampled = group
        balanced_parts.append(sampled)

gdf_strat = pd.concat(balanced_parts, ignore_index=True)

# 4) Verify the new distribution
print(gdf_strat['Crop_Type'].value_counts())

Crop_Type
Tomato            94
Wheat             81
Persian Clover    57
Sugarcane         52
Lady Finger       36
Orchard           36
Eucalyptus        36
Corn              16
Name: count, dtype: int64


In [32]:
ee.Authenticate()

True

In [33]:
ee.Initialize()

In [34]:
# Converting the gdf_balanced to ee feature collection
crop_fc = geemap.gdf_to_ee(gdf_strat)

In [45]:
# Define a function to get imagery an apply filter
def get_imagery (start_date,end_date):
    collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
    filtered_collection = collection.filterDate(start_date, end_date).filterBounds(crop_fc).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))
    median_image = collection.median().clip(crop_fc)
    # Select the first image from the filtered collection
    median_image = filtered_collection.median().clip(crop_fc)
    return median_image


In [46]:
# Seperate for Rabi and Kharif Season of the current year


def get_ndvindwi(median_image):
    # Calculate NDVI
    ndvi = median_image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    # McFeeters NDWI = (Green - NIR) / (Green + NIR)
    ndwi = median_image.normalizedDifference(['B3', 'B8']).rename('NDWI')
    return ndvi, ndwi

def get_otherbands(median_image):
    # Adding other bans and calculating their mean
    bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
    band_img = median_image.select(bands).rename([f'{b}_mean' for b in bands])
    return band_img

def get_texture(median_image):
    # Calculating the texture
    nir_int = median_image.select('B8').toInt32()
    texture = nir_int.glcmTexture(size=3)
    contrast = texture.select('B8_contrast').rename('contrast')
    return contrast

def get_features(ndvi, ndwi, band_img, contrast):
    # Combine all features into a single image
    features_img = band_img.addBands([ndvi, ndwi])
    features_img = features_img.addBands(contrast)
    return features_img


In [47]:
#---------------------------
# Plan reduciton

def get_reduction(fc):
    # Step 2: Calculate mean NDVI for each agri polygon
    pcs = fc.reduceRegions(
    collection=crop_fc,
    reducer=ee.Reducer.mean(),
    scale=10,)
    # Convert the result to dataframe
    cf= geemap.ee_to_gdf(pcs)
    return cf



In [48]:
#----------------------------------
# Now calculate area, perimeter etc
def get_area(cropgdf):
# We need to calculate the area of each polygon in square meters
# Therefore we will convert the geometry to a projected coordinate system (EPSG:32643)
    cropgdf=cropgdf.to_crs(epsg=32643)
# Calculating the area , perimetere and compactness
    cropgdf['Area_m2'] = cropgdf.geometry.area
    cropgdf['Perimeter_m'] = cropgdf.geometry.length
    cropgdf['Compactness'] = (4 * 3.14 * cropgdf['Area_m2']) / (cropgdf['Perimeter_m'] ** 2)

# Switch back to lat/lon if needed for mapping
    cropgdf = cropgdf.to_crs(epsg=4326)

#Inspect the new columns
#gdfcrop_features[['Area_m2','Perimeter_m','Compactness']].head()
    return cropgdf



In [None]:
# Getting imagery and features
early_Kharif=get_imagery('2025-04-01','2025-05-03')
rabi = get_imagery('2024-10-01','2025-03-31')
full_year = get_imagery('2024-09-01','2025-05-03')

ndvi_kharif, ndwi_kharif = get_ndvindwi(early_Kharif)
ndvi_rabi, ndwi_rabi = get_ndvindwi(rabi)
ndvi_full, ndwi_full = get_ndvindwi(full_year)

band_kharif = get_otherbands(early_Kharif)
band_rabi = get_otherbands(rabi)
band_full = get_otherbands(full_year)

texture_kharif = get_texture(early_Kharif)
texture_rabi = get_texture(rabi)    
texture_full = get_texture(full_year)

kharif_feats = get_features(ndvi_kharif, ndwi_kharif, band_kharif, texture_kharif)
rabi_feats = get_features(ndvi_rabi, ndwi_rabi, band_rabi, texture_rabi)    
full_feats = get_features(ndvi_full, ndwi_full, band_full, texture_full)    
composite_features = band_kharif.addBands([ndvi_kharif, ndwi_kharif, texture_kharif,ndvi_rabi, ndwi_rabi,band_rabi, texture_rabi,ndvi_full, ndwi_full,band_full, texture_full])




In [50]:
# Get reduction gdf for  Kharif_feas,rabi_feats and composite_features
kharif_reduced=get_reduction(kharif_feats)
rabi_reduced=get_reduction(rabi_feats)
full_reduced=get_reduction(full_feats)
composite_reduced=get_reduction(composite_features)


In [51]:
#Get area, perimetetr and compactness for kharif_reduced,rabi_reduced and composite_reduced
kharif_reduced=get_area(kharif_reduced)
rabi_reduced=get_area(rabi_reduced)
composite_reduced=get_area(composite_reduced)
full_reduced=get_area(full_reduced)

In [52]:
# Check the result
#print(gdf_with_ndvi[['Crop_Type', 'NDVI', 'NDWI']].head())
# Combining both NDVI and NDWI into single feature
feature_cols = ['NDVI', 'NDWI', 'B2_mean', 'B3_mean', 'B4_mean', 'B8_mean', 'B11_mean', 'B12_mean','Area_m2','Perimeter_m','Compactness','contrast']



In [62]:
# Checking one by one

#gdfcrop_features= kharif_reduced # Checking the Kharif Season
#gdfcrop_features= rabi_reduced # Checking the rabi Season
#gdfcrop_features= full_reduced # Checking the full Season
gdfcrop_features= composite_reduced # Checking the composite Season

print(gdfcrop_features[['Crop_Type'] + feature_cols ].head())


  Crop_Type      NDVI      NDWI     B2_mean     B3_mean      B4_mean  \
0      Corn  0.437319 -0.474599  659.888320  964.210453  1063.545672   
1      Corn  0.571453 -0.573919  418.263079  708.661012   715.092322   
2      Corn  0.411469 -0.463134  640.955983  939.710167  1068.994007   
3      Corn  0.375096 -0.431819  662.408324  975.366342  1136.211438   
4      Corn  0.388800 -0.460386  648.473067  942.079897  1148.008218   

       B8_mean     B11_mean     B12_mean      Area_m2  Perimeter_m  \
0  2695.504821  2099.207058  1609.481764  9899.429387   530.225196   
1  2624.495936  1797.125069  1248.404009  4117.205775   308.880640   
2  2557.671523  2120.781980  1571.890473  1563.555794   203.633454   
3  2448.961729  1987.405923  1610.514042  2233.372236   243.888865   
4  2543.340775  1964.578425  1593.682690  3415.636358   274.419225   

   Compactness       contrast  
0     0.442261   98602.724982  
1     0.542014  110709.681993  
2     0.473592   27397.113337  
3     0.471592   4

In [None]:
from sklearn.model_selection import train_test_split
# Splitting the data into training and testing sets

#Step 1 Define features (NDVI mean) and target label (Crop_Type)
X = gdfcrop_features[feature_cols].values
y = gdfcrop_features['Crop_Type'].values

# Train-test Split 80 % train and 20% test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42,stratify=y)



In [64]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Initialize the model
rf_model = RandomForestClassifier(n_estimators=200,
    max_depth=15,
    class_weight='balanced_subsample',
    random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Predict on the test set
y_pred = rf_model.predict(X_test)

# Evaluate the model
print("Classification Report:")
print(classification_report(y_test, y_pred,zero_division=0))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


Classification Report:
                precision    recall  f1-score   support

          Corn       0.00      0.00      0.00         3
    Eucalyptus       0.88      1.00      0.93         7
   Lady Finger       0.88      1.00      0.93         7
       Orchard       1.00      1.00      1.00         7
Persian Clover       0.50      0.50      0.50        12
     Sugarcane       0.00      0.00      0.00        11
        Tomato       0.33      0.47      0.39        19
         Wheat       0.43      0.38      0.40        16

      accuracy                           0.51        82
     macro avg       0.50      0.54      0.52        82
  weighted avg       0.47      0.51      0.49        82

Confusion Matrix:
[[0 0 0 0 0 0 2 1]
 [0 7 0 0 0 0 0 0]
 [0 0 7 0 0 0 0 0]
 [0 0 0 7 0 0 0 0]
 [0 0 0 0 6 0 4 2]
 [1 0 0 0 3 0 6 1]
 [1 1 1 0 1 2 9 4]
 [0 0 0 0 2 2 6 6]]


In [65]:
# 1. Build full‑year feature image & reduce → gdf_full
gdf_full = full_reduced  
# yields columns: NDVI, NDWI, B*_mean, contrast, area, perimeter, compactness

# 2. Build early‑Kharif NDVI image & reduce → gdf_early
ndvi_early, _ = get_ndvindwi(early_Kharif)  # use only NDVI
gdf_early = geemap.ee_to_gdf(
    ndvi_early.addBands(crop_fc)
     .reduceRegions(collection=crop_fc, reducer=ee.Reducer.mean(), scale=10)
)

# 3. Merge the two GeoDataFrames on parcel ID
gdf_feat = gdf_full.merge(
    gdf_early[['Parcel_ID','NDVI_mean']].rename(columns={'NDVI_mean':'NDVI_early'}),
    on='Parcel_ID'
)

# 4. Create delta feature
gdf_feat['delta_NDVI'] = gdf_feat['NDVI_early'] - gdf_feat['NDVI']

# 5. Final feature columns
feature_cols = [
    'NDVI','NDWI','B2_mean','B3_mean','B4_mean','B8_mean','B11_mean','B12_mean',
    'contrast','area_m2','perimeter_m','compactness',
    'delta_NDVI'
]

# 6. Train/test split & RF
X = gdf_feat[feature_cols].values
y = gdf_feat['Crop_Type'].values
X_train, X_test, y_train, y_test = train_test_split(
    X,y,test_size=0.2,stratify=y,random_state=42
)
rf = RandomForestClassifier(
    n_estimators=300, max_depth=15,
    class_weight='balanced_subsample', random_state=42
)
rf.fit(X_train,y_train)
y_pred = rf.predict(X_test)
print(classification_report(y_test,y_pred,zero_division=0))


Exception: Image.addBands, argument 'srcImg': Invalid type.
Expected type: Image<unknown bands>.
Actual type: FeatureCollection.