<a href="https://colab.research.google.com/github/ced-sys/AI---ML/blob/main/Untitled63.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install catboost

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
import xgboost as xgb
import lightgbm as lgb
from catboost import CatBoostClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [None]:
class CroplandMapper:
  def __init__(Self, data_path):
    self.data_path=data_path
    self.models={}
    self.scaler={}
    self.feature_names=[]
    self.best_model=None

  def load_data(self):
    print("Loading data files...")

    try:
      self.train_fergana=gpd.read_file(f"{self.data_path}/Train/fergana_train.shp")
      self.train_orenburg-gpd.read_file(f"{self.data_path}/Train/orenburg_train.shp")
      print(f"Loaded Fergana training data: {len(self.train_fergana)} samples")
      print(f"Loaded Orenburg training data: {len(self.train_orenburg)} samples")

    except Exception as e:
      print(f"Error loading shapefiles: {e}")
      try:
        import os
        train_files=os.listdir(f"{self.data_path}/Train")
        shp_files=[f for f in train_files if f.endswith('.shp')]
        print(f"Found shapefiles: {shp_files}")

        if len(sh_files)>=2:
          self.train_fergana=gpd.read_file(f"{self.data_path}/Train/{shp_files[0]}")
          self.train_orenburg=gpd.read_file(f"{self.data_path}/Train/{shp_files[1]}")
          print(f"Loaded trainin data successfully")

      except Exception as e2:
        print(f"Failed to load shapefiles: {e2}")
        return False

    try:

      self.sentinel1_data=pd.read_csv(f"{self.data_path}/Sentinel1.csv")
      self.sentinel2_data=pd.read_csv(f"{self.data_path}/Sentinel2.csv")
      self.test_data=pd.read_csv(f"{self.data_path}/Test.csv")

    except Exception as e:
      print(f"Error loading CSV files: {e}")
      return False

    return True

  def explore_data(self):
    self.combined_train=pd.concat([
        self.train_fergana.drop('geometry', axis=1),
        self.train_orenburg.drop('geometry', axis=1)
    ], ignore_index=True)

    print(f"\nCombined training data shape: {self.combined_train.shape}")
    print(f"\nColumns in training data: {list(self.combined_train.columns)}")

    if 'Target' in self.combined_train.columns:
      target_dist=self.combined_train['Target'].value_counts()
      print(f"\nTarget distribution:")
      print(f"Cropland (1): {target_dist.get(1, 0)} ({target_dist.get(1, 0)/len(self.combined_train)*100:.1f}%)")
      print(f"Non-cropland (0): {target_dist.get(0, 0)} ({target_dist.get(0, 0)/len(self.combined_train)*100:.1f}%)")

    print(f"\nSentinale-1 columns: {list(self.sentinel1_data.columns)}")
    print(f"Sentinel-2 columns: {list(self.sentinel2_data.columns)}")

    print(f"\nMissing values in Sentinel-1: {self.sentinel1_data.isnull().sum().sum()}")
    print(f"\nMissing values in Sentinel-2: {self.sentinel2_data.isnull().sum().sum()}")

    return self.combined_train

  def create_features(self):
    merged_data=pd.merge(self.sentinel1_data, self.sentinel2_data, on='ID', how='outer')
    print(f"Merged satellite data shape: {merged_data.shape}")

    for col in merged_data.columns:
      if col!="ID" and merged_data[col].dtype in ['float64', 'int64']:
        merged_data[col]=merged_data[col].fillna(merged_data[col].median())

        self.features_df=merged_data.cppy()

        s1_bands=[col for col in merged_data.columns if 'VV' in col or' VH' in col]
        s2_bands=[col for col in merged_data.columns if any (b in col for b in ['B02', 'B03', 'B04', 'B08', 'B11', 'B11'])]

        print(f"Sentinel-1bands found: {len(s1_bands)}")
        print(f"Sentinel-2 bands found: {len(s2_bands)}")

        for band_group, band_name in [(s1_bands, 'S1'), (s2_bands, 'S2')]:
          if band_group:
            band_data=merged_data[band_group]

            self.features_df[f'{band_name}_mean']=band_data.mean(axis=1)
            self.features_df[f'{band_name}_std']=band_data.std(axis=1)
            self.features_df[f'{band_name}_min']=band_data.min(axis=1)
            self.features_df[f'{band_name}_max']=band_data.max(axis=1)
            self.features_df[f'{band_name}_range']=self.features_df[f'{band_name}_max']-self.features_df[f'{band_name}_min']
            self.features_df[f'{band_name}_cv']=self.features_df[f'{band_name}_std']/ (self.features_df[f'{band_name}_mean']+1e-8)

            for i, row in badn_data.iterrows():
              values=row.dropna().vaues
              if len(values)>1:
                trend=np.polyfit(range(len(values)), values, 1)[0]
                self.features_df.loc[i, f'{band_name}_trend']=trend
              else:
                self.features_dfloc[i, f'{band_name}_trend']=0


                nir_cols=[col for col in s2_bands if 'B08' in col]
                red_cols=[col for col i s2_bands if 'B04' in col]

                if nir_cols and red_cols:
                  for nir_col, red_col in zip(nir_cols, red_cols):
                    date_suffix=nir_col.split('_')[-1] if '_' in nir_col else ''
                    ndvi_col=f'NDVI_{date_suffix}' if date_suffix else f'NDVI_{nir_col[-8:]}'

                    nir_vals=merged_data[nir_col]
                    red_vals=merged_data[red_col]
                    self.features_df[ndvi_col]=(nir_vals-red_vals)/(nir_vals+red_vals+1e-8)

                    ndvi_cols=[col for col in self.features_df.columns if 'NDVI' in col]
                    if ndvi_cols:
                      ndvi_data=self.features_df[ndvi_cols]
                      self.features_df['NDVI_mean']=ndvi_data.mean(axis=1)
                      self.features_df['NDVI_std']=ndvi_data.std(axis=1)
                      self.features_df['NDVI_min']=ndvi_data.min(axis=1)
                      self.features_df['NDVI_max']=ndvi_data.max(axis=1)
                      self.features_df['NDVI_range']=self.features_df['NDVI_max']-self.features_df['NDVI_min']

                if s1_bands:
                  VV_cols=[col for col in s1_bands if 'VV' in col]
                  vh_cols=[col for col in s1_bands if 'VH' in col]

                  if vv_cols and vh_cols:
                    for vv_cols, vh_col in zip(vv_cols, vh_cols):
                      date_suffix=vv_col.split('_')[-1]if '_' in vv_col else ''
                      ratio_col=f'VV_VH_ratio_{date_suffix}' if date_suffix else f'VV_VH_ratio_{vv_col[-8:]}'

                      vv_vals=merged_data[vv_col]
                      vh_vals=merged_data[vh_col]
                      self.features-df[ratio_col]=vv_vals/(vh_vals+1e-8)

      print(f"Total features created: {len(self.feature_df.columns)}")
      return self.features_df




