In [15]:
# Standard Library Imports
from concurrent.futures import ThreadPoolExecutor, as_completed
from joblib import Parallel, delayed
from tqdm import tqdm
from datetime import datetime
from datetime import timedelta
from pathlib import Path
import os
import random

from tqdm.notebook import tqdm
import time

# Third-Party Imports
import ee
import geemap
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import rasterio
from lightgbm import LGBMClassifier
from shapely.affinity import scale, translate
from skimage import exposure
from sklearn.metrics import classification_report, f1_score
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.preprocessing import LabelEncoder

from shapely import wkt
import geopandas as gpd

import warnings
warnings.filterwarnings('ignore')

import logging
# Set up a logger to capture Rasterio warnings
logging.getLogger("rasterio._env").setLevel(logging.ERROR)

In [16]:
# Define the root path for the project
root_path = Path("..")

In [17]:
# Initialize Earth Engine with a specific project
# Replace "project" with your project ID as needed
#ee.Authenticate()
ee.Initialize(project="ee-crop-health-telangana")

In [18]:
# Load training and testing datasets from CSV files
train = pd.read_csv(root_path / 'data/train.csv')
test = pd.read_csv(root_path / 'data/test.csv')

# Convert WKT geometry to actual geometry objects in both datasets
train['geometry'] = train['geometry'].apply(wkt.loads)
test['geometry'] = test['geometry'].apply(wkt.loads)

# Convert pandas DataFrames to GeoDataFrames with CRS set to 'epsg:4326'
train = gpd.GeoDataFrame(train, crs='epsg:4326')
test = gpd.GeoDataFrame(test, crs='epsg:4326')

# Concatenate train and test datasets into a single DataFrame for consistent processing
# 'dataset' column distinguishes between train and test rows
data = pd.concat(
    [train.assign(dataset='train'), test.assign(dataset='test')]
).reset_index(drop=True)


In [21]:
enriched_data = pd.read_csv(root_path / 'data/sentinel-2-all/sentinel_downloads.csv')
enriched_data.tif_paths.iloc[0]

"['../data/sentinel-2-all/S2_1326576_20240330.tif', '../data/sentinel-2-all/S2_1326576_20240404.tif', '../data/sentinel-2-all/S2_1326576_20240414.tif', '../data/sentinel-2-all/S2_1326576_20240424.tif']"