In [21]:
import geopandas as gpd
import pandas as pd
import numpy as np
import joblib
import geemap
import ee 

In [2]:
ee.Authenticate()


Successfully saved authorization token.


In [10]:
ee.Initialize()

In [9]:
cropfile = 'pabbi_crop.geojson'
gdf = gpd.read_file(cropfile)
gdf.head()

Unnamed: 0,Mouza_Name,Landuse_Ma,Area_Acre,FFID,Parcel_ID,Crop_Type,geometry
0,Khushmaqam,Agriculture,0.101785,1,668.0,,"MULTIPOLYGON (((753923.077 3769111.141, 753894..."
1,Khushmaqam,Built up,0.036718,2,670.0,,"MULTIPOLYGON (((753959.894 3769126.291, 753959..."
2,Khushmaqam,Agriculture,0.315557,3,632.0,,"MULTIPOLYGON (((753839.267 3769129.693, 753841..."
3,Khushmaqam,Agriculture,0.187644,4,669.0,,"MULTIPOLYGON (((753952.852 3769131.737, 753946..."
4,Khushmaqam,Agriculture,0.161568,5,693.0,,"MULTIPOLYGON (((754361.202 3769166.424, 754269..."


# Selecting those columns which have None in Crop_Type and Agricultue in Landuse_Ma
# gdf.loc[(gdf['Crop_Type'].isna()) & (gdf['Landuse_Ma'] == 'Agriculture')]
testinggdf = gdf.loc[(gdf['Crop_Type'].isna()) & (gdf['Landuse_Ma'] == 'Agriculture')]
testinggdf

# lets fill the Crop_Type with 'a' 
testinggdf.to_file('real_crop_testing.geojson', driver='GeoJSON')

In [4]:
#Filter and clean data is now in the real_crop_testing.geojson
file = 'real_crop_testing.geojson'
cropgdf = gpd.read_file(file)


In [None]:
print(cropgdf.head())   
print(len(cropgdf))

5288


In [11]:
# Lets convert the cropgdf to ee featues collection to be used for reduction.
gdf_fc = geemap.gdf_to_ee(gdf) #this is the one we will used for clipping
cropfeat=geemap.gdf_to_ee(cropgdf) #we will use this for reduction

In [14]:
# Define a function to get imagery an apply filter
def get_imagery (start_date,end_date):
    collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
    filtered_collection = collection.filterDate(start_date, end_date).filterBounds(gdf_fc).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', 10))
    median_image = filtered_collection.median().clip(gdf_fc.geometry())
    return median_image


In [15]:
# Seperate for Rabi and Kharif Season of the current year


def get_ndvindwi(median_image):
    # Calculate NDVI
    ndvi = median_image.normalizedDifference(['B8', 'B4']).rename('NDVI')
    # McFeeters NDWI = (Green - NIR) / (Green + NIR)
    ndwi = median_image.normalizedDifference(['B3', 'B8']).rename('NDWI')
    return ndvi, ndwi

def get_mndwi(median_image):
    # Calculate MNDWI
    mndwi = median_image.normalizedDifference(['B3', 'B11']).rename('MNDWI')
    return mndwi

def get_savi(median_image):
    # Calculate Soil Adjusted Vetetation Index (SAVI)
    savi = median_image.expression(
        '(NIR - RED) / (NIR + RED + L) * (1 + L)', {
            'NIR': median_image.select('B8'),
            'RED': median_image.select('B4'),
            'L': 0.5
        }).rename('SAVI')
    return savi
def get_arvi(median_image):
    # Calculate Atmospherically Resistant Vegetation Index (ARVI)
    arvi = median_image.expression(
        '(NIR - (2 * RED - BLUE)) / (NIR + (2 * RED - BLUE))', {
            'NIR': median_image.select('B8'),
            'RED': median_image.select('B4'),
            'BLUE': median_image.select('B2')
        }).rename('ARVI')
    return arvi

def get_otherbands(median_image):
    # Adding other bans and calculating their mean
    bands = ['B2', 'B3', 'B4', 'B8', 'B11', 'B12']
    band_img = median_image.select(bands).rename([f'{b}_mean' for b in bands])
    return band_img

def get_texture(median_image):
    # Calculating the texture
    nir_int = median_image.select('B8').toInt32()
    texture = nir_int.glcmTexture(size=3)
    contrast = texture.select('B8_contrast').rename('contrast')
    entropy = texture.select('B8_ent').rename('entropy')
    return contrast,entropy

def get_features(ndvi, ndwi, band_img, contrast, mndwi,entropy,savi,arvi):
    # Combine all features into a single image
    features_img = band_img.addBands([ndvi, ndwi,mndwi,entropy,savi,arvi])
    features_img = features_img.addBands(contrast)
    return features_img


In [16]:
#---------------------------
# Plan reduciton

def get_reduction(fc):
    # Step 2: Calculate mean NDVI for each agri polygon
    pcs = fc.reduceRegions(
    collection=cropfeat,
    reducer=ee.Reducer.mean(),
    scale=10,)
    # Convert the result to dataframe
    cf= geemap.ee_to_gdf(pcs)
    return cf



In [17]:
#----------------------------------
# Now calculate area, perimeter etc
def get_area(cropgdf):
# We need to calculate the area of each polygon in square meters
# Therefore we will convert the geometry to a projected coordinate system (EPSG:32643)
    cropgdf=cropgdf.to_crs(epsg=32643)
# Calculating the area , perimetere and compactness
    cropgdf['Area_m2'] = cropgdf.geometry.area
    cropgdf['Perimeter_m'] = cropgdf.geometry.length
    cropgdf['Compactness'] = (4 * 3.14 * cropgdf['Area_m2']) / (cropgdf['Perimeter_m'] ** 2)

# Switch back to lat/lon if needed for mapping
    cropgdf = cropgdf.to_crs(epsg=4326)

#Inspect the new columns
#gdfcrop_features[['Area_m2','Perimeter_m','Compactness']].head()
    return cropgdf



In [18]:
# Getting imagery and features
rabi = get_imagery('2024-10-01','2025-03-31')

ndvi_rabi, ndwi_rabi = get_ndvindwi(rabi)

savi_rabi = get_savi(rabi)

arvi_rabi = get_arvi(rabi)

mndwi_rabi = get_mndwi(rabi)    

band_rabi = get_otherbands(rabi)

contrast_rabi, entropy_rabi = get_texture(rabi)    

rabi_feats =   get_features(ndvi_rabi, ndwi_rabi,band_rabi, contrast_rabi,entropy_rabi,mndwi_rabi,savi_rabi,arvi_rabi)    



In [20]:
# Get reduction gdf for  Kharif_feas,rabi_feats and composite_features
rabi_reduced=get_reduction(rabi_feats)

In [22]:
#Get area, perimetetr and compactness for kharif_reduced,rabi_reduced and composite_reduced
rabi_reduced=get_area(rabi_reduced)

In [23]:
rabi_reduced.columns

Index(['geometry', 'ARVI', 'Area_Acre', 'B11_mean', 'B12_mean', 'B2_mean',
       'B3_mean', 'B4_mean', 'B8_mean', 'Crop_Type', 'FFID', 'Landuse_Ma',
       'MNDWI', 'Mouza_Name', 'NDVI', 'NDWI', 'Parcel_ID', 'SAVI', 'contrast',
       'entropy', 'Area_m2', 'Perimeter_m', 'Compactness'],
      dtype='object')

In [24]:
rabi_reduced['Crop_Type'].value_counts()

Crop_Type
a    5288
Name: count, dtype: int64

In [25]:
# List out the exact feature columns in your composite_reduced
feature_cols = [
    # early‑Kharif means (no “_1” suffix)
    'B2_mean', 'B3_mean', 'B4_mean', 'B8_mean', 'B11_mean', 'B12_mean','ARVI',
    'SAVI','NDVI', 'NDWI', 'contrast','MNDWI','entropy',
    
    # shape metrics (if present—you can add Area_Acre or Area_m2 etc.)
    'Area_m2','Perimeter_m','Compactness'       # or 'Area_m2','Perimeter_m','Compactness'
  
]


In [None]:
# Confirm target Class and features columns

print(rabi_reduced['Crop_Type'].value_counts().to_frame('count'))
print(rabi_reduced[feature_cols].value_counts())

           count
Crop_Type       
a           5288
                                                                                                                                                                                   count
B2_mean     B3_mean     B4_mean     B8_mean     B11_mean    B12_mean    ARVI      SAVI     NDVI     NDWI      contrast     MNDWI     entropy  Area_m2     Perimeter_m Compactness       
1497.155556 1735.107801 1990.675177 2342.493617 2475.573050 2441.695745 -0.029045 0.122933 0.081965 -0.150985 66090.655616 -0.178059 4.352179 229.589408  61.077188   0.773007         1
342.239309  539.461349  455.721217  2151.990954 1243.381579 773.943257   0.585195 0.977162 0.651566 -0.598909 49302.192725 -0.395061 4.338324 396.694491  108.454549  0.423594         1
347.981897  486.023676  400.400206  1892.511564 1158.293970 680.916731   0.615701 0.976124 0.650893 -0.590743 57975.827657 -0.407705 4.314263 1680.113701 264.864248  0.300803         1
357.128555  593.677017  

In [36]:
# This is the revised prediction with probability for unknow types
pipeline = joblib.load('crop_rf_pipeline.pkl')
rf_model = pipeline['model']
feature_cols = pipeline['feature_cols']

X = rabi_reduced[feature_cols].fillna(0).values

# Compute the prediction probabilities
probs = rf_model.predict_proba(X)
max_probs = probs.max(axis=1)

pred_class = rf_model.classes_[probs.argmax(axis=1)]
# 6. Apply threshold: label “Unknown” if below 0.6
threshold = 0.60

rabi_reduced['Crop_Type'] = np.where(max_probs >= threshold, pred_class, 'Unknown')

# 7. (Optional) Summarize counts per class
counts = rabi_reduced['Crop_Type'].value_counts().to_frame('Count')
print(counts)



                Count
Crop_Type            
Unknown          3086
Wheat            1575
Persian Clover    533
Orchard            52
Eucalyptus         28
Potato             14


In [None]:
# Checking the values of probabilities for understanding
print(rabi_reduced['Crop_Type'].value_counts().to_frame('count'))

                count
Crop_Type            
Unknown          3086
Wheat            1575
Persian Clover    533
Orchard            52
Eucalyptus         28
Potato             14


# This is original restore and without probability
import joblib

X = rabi_reduced[feature_cols].values


# 1. Load your saved pipeline
pipeline = joblib.load('crop_rf_pipeline.pkl')
rf_model = pipeline['model']
feature_cols = pipeline['feature_cols']


# 2. Prepare the feature matrix (handle any NaNs exactly as during training)
X = rabi_reduced[feature_cols].fillna(0).values

# 5. Predict and assign back to gdf
rabi_reduced['Crop_Type'] = rf_model.predict(X)

# 6. (Optional) Save the updated gdf to a new file
rabi_reduced.to_file("new_Prediction.geojson", driver="GeoJSON")

print("Prediction complete — GeoJSON with predicted Crop_Type saved.")


In [40]:
newpred = rabi_reduced.to_file("new_Prediction_V1.geojson", driver='GeoJSON')


# Result and Conclusion:
The results are not good enough bez builtup parcels which were initially agriculture are labelled as wheat.
WayForward: First split the parcels based on Landuse_Ma and then used the clean parcels for crop detection.