In [None]:
import os, sys
import rasterio
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import fiona
import folium
import pandas as pd
import rasterio.mask
import rasterio
import utils
import requests
import subprocess
import plotly.express as px
import plotly.graph_objs as go
from concurrent.futures import ThreadPoolExecutor
from pathlib import Path

In [None]:
hsi_img_path = r'C:\Users\brunolopez\Downloads\ang20150501t000352_rfl_v1f\ang20150501t000352_corr_v1f_img'

Let us view how this image looks in RGB

In [None]:
utils.view_image_rgb_interactive(hsi_img_path)

Extracting KML FIles from NASA AVIRIS Website

In [None]:
class AVIRISDataProcessor:
    def __init__(self, excel_file, kml_dir, shapefile_dir):
        self.excel_file = excel_file
        self.kml_dir = kml_dir
        self.shapefile_dir = shapefile_dir
        self.aviris_path_df = pd.read_excel(self.excel_file)

    def download_aviris_kml(self):
        # Ensure the output directory exists
        Path(self.kml_dir).mkdir(parents=True, exist_ok=True)

        def download_kml(row):
            kml_link = row.link_kml_outline
            if pd.isna(kml_link):
                print(f"Skipping {row.Name} due to missing KML link")
                return  # Skip if the link is NaN
            output_name = f"{row.Name}.kml"
            final_output_path = Path(self.kml_dir) / output_name

            try:
                response = requests.get(kml_link)
                response.raise_for_status()  # Will raise an exception for HTTP errors
                with open(final_output_path, 'wb') as file:
                    file.write(response.content)
                print(f"Downloaded: {output_name}")
            except requests.exceptions.HTTPError as err:
                print(f"HTTP Error for {kml_link}: {err}")
            except requests.exceptions.RequestException as err:
                print(f"Request Exception for {kml_link}: {err}")

        # Using ThreadPoolExecutor to download files in parallel
        with ThreadPoolExecutor(max_workers=5) as executor:
            executor.map(download_kml, self.aviris_path_df.itertuples(index=False))

    def convert_kml_to_shapefiles_gdal(self):
        # Ensure the output directory exists
        Path(self.shapefile_dir).mkdir(parents=True, exist_ok=True)

        # List all KML files in the specified directory
        kml_files = list(Path(self.kml_dir).glob('*.kml'))
        print(f"Found {len(kml_files)} KML files.")

        for kml_file in kml_files:
            print(f"Converting {kml_file} to Shapefile...")
            try:
                shapefile_name = kml_file.stem + '.shp'
                shapefile_path = Path(self.shapefile_dir) / shapefile_name

                # Construct the ogr2ogr command
                cmd = [
                    "ogr2ogr",
                    "-f", "ESRI Shapefile",
                    str(shapefile_path),
                    str(kml_file)
                ]

                # Run the command
                subprocess.run(cmd, check=True)
                print(f"Saved Shapefile: {shapefile_path}")
            except subprocess.CalledProcessError as e:
                print(f"Failed to convert {kml_file}: {e}")


Here we label the classes Based on what their crop code is from the i15 crop shapefile directory. This code block assigns a label to it

In [None]:
#Code for labelling subclasses based on their crop type using the i15 crop shapefile

class ShapefileProcessor:
    def __init__(self, shapefile_path, output_shapefile_path):
        self.shapefile_path = shapefile_path
        self.load_shapefile = gpd.read_file(self.shapefile_path)
        self.class_mapping = self.get_class_mapping()
        self.output_shapefile_path = output_shapefile_path

    def get_class_mapping(self):
        return {
            'G': {
                'description': 'Grain and hay crops',
                'subclasses': {
                    '1': 'Barley',
                    '2': 'Wheat',
                    '3': 'Oats',
                    '6': 'Miscellaneous grain and hay',
                    '7': 'Mixed grain and hay',
                    '**': 'General grain and hay crops'
                }
            },
            'R': {
                'description': 'Rice',
                'subclasses': {
                    '1': 'Rice',
                    '2': 'Wild Rice',
                    '**': 'General rice'
                }
            },
            'F': {
                'description': 'Field crops',
                'subclasses': {
                    '1': 'Cotton',
                    '2': 'Safflower',
                    '3': 'Flax',
                    '4': 'Hops',
                    '5': 'Sugar beets',
                    '6': 'Corn (field & sweet)',
                    '7': 'Grain sorghum',
                    '8': 'Sudan',
                    '9': 'Castor beans',
                    '10': 'Beans (dry)',
                    '11': 'Miscellaneous field',
                    '12': 'Sunflowers',
                    '13': 'Hybrid sorghum/sudan',
                    '14': 'Millet',
                    '15': 'Sugar cane',
                    '16': 'Corn, Sorghum, or Sudan grouped for remote sensing only',
                    '**': 'General field crops'
                }
            },
            'P': {
                'description': 'Pasture',
                'subclasses': {
                    '1': 'Alfalfa & alfalfa mixtures',
                    '2': 'Clover',
                    '3': 'Mixed pasture',
                    '4': 'Native pasture',
                    '5': 'Induced high water table native pasture',
                    '6': 'Miscellaneous grasses',
                    '7': 'Turf farms',
                    '8': 'Bermuda grass',
                    '9': 'Rye grass',
                    '10': 'Klein grass',
                    '**': 'General pasture'
                }
            },
            'T': {
                'description': 'Truck, nursery & berry crops',
                'subclasses': {
                    '1': 'Artichokes',
                    '2': 'Asparagus',
                    '3': 'Beans (green)',
                    '4': 'Cole crops (mixture of 22-25)',
                    '6': 'Carrots',
                    '7': 'Celery',
                    '8': 'Lettuce (all types)',
                    '9': 'Melons, squash, and cucumbers (all types)',
                    '10': 'Onions & garlic',
                    '11': 'Peas',
                    '12': 'Potatoes',
                    '13': 'Sweet potatoes',
                    '14': 'Spinach',
                    '15': 'Tomatoes (processing)',
                    '16': 'Flowers, nursery & Christmas tree farms',
                    '17': 'Mixed (four or more)',
                    '18': 'Miscellaneous truck',
                    '19': 'Bush berries',
                    '20': 'Strawberries',
                    '21': 'Peppers (chili, bell, etc.)',
                    '22': 'Broccoli',
                    '23': 'Cabbage',
                    '24': 'Cauliflower',
                    '25': 'Brussels sprouts',
                    '26': 'Tomatoes (market)',
                    '27': 'Greenhouse',
                    '28': 'Blueberries',
                    '29': 'Asian leafy vegetables',
                    '30': 'Lettuce or Leafy Greens grouped for remote sensing only',
                    '31': 'Potato or Sweet potato grouped for remote sensing only',
                    '**': 'General truck, nursery & berry crops'
                }
            },
            'D': {
                'description': 'Deciduous fruits and nuts',
                'subclasses': {
                    '1': 'Apples',
                    '2': 'Apricots',
                    '3': 'Cherries',
                    '5': 'Peaches and nectarines',
                    '6': 'Pears',
                    '7': 'Plums',
                    '8': 'Prunes',
                    '9': 'Figs',
                    '10': 'Miscellaneous deciduous',
                    '11': 'Mixed deciduous',
                    '12': 'Almonds',
                    '13': 'Walnuts',
                    '14': 'Pistachios',
                    '15': 'Pomegranates',
                    '16': 'Plums, Prunes, or Apricots grouped for remote sensing only',
                    '**': 'General deciduous fruits and nuts'
                }
            },
            'C': {
                'description': 'Citrus and subtropical',
                'subclasses': {
                    '1': 'Grapefruit',
                    '2': 'Lemons',
                    '3': 'Oranges',
                    '4': 'Dates',
                    '5': 'Avocados',
                    '6': 'Olives',
                    '7': 'Miscellaneous subtropical fruit',
                    '8': 'Kiwis',
                    '9': 'Jojoba',
                    '10': 'Eucalyptus',
                    '11': 'Mixed subtropical fruits',
                    '**': 'General citrus and subtropical'
                }
            },
            'V': {
                'description': 'Vineyards',
                'subclasses': {
                    '1': 'Table grapes',
                    '2': 'Wine grapes',
                    '3': 'Raisin grapes',
                    '**': 'General vineyards'
                }
            },
            'YP': {
                'description': 'YP - no subclass',
                'subclasses': {
                    '**': 'General YP'
                }
            },
            'I': {
                'description': 'Idle',
                'subclasses': {
                    '1': 'Land not cropped the current or previous crop season, but cropped within the past three years',
                    '2': 'New lands being prepared for crop production',
                    '4': 'Long term, land consistently idle for four or more years',
                    '**': 'General idle'
                }
            },
            'X': {
                'description': 'Not cropped or unclassified, no subclass',
                'subclasses': {
                    '**': 'General not cropped or unclassified'
                }
            },
            'S': {
                'description': 'Semi-agricultural & incidental to agriculture',
                'subclasses': {
                    '1': 'Farmsteads (includes a farm residence)',
                    '2': 'Livestock feed lot operations',
                    '3': 'Dairies',
                    '4': 'Poultry farms',
                    '5': 'Farmsteads (without a farm residence)',
                    '6': 'Miscellaneous semi-agricultural (small roads, ditches, non-planted areas of cropped fields)',
                    '**': 'General semi-agricultural & incidental to agriculture'
                }
            },
            'U': {
                'description': 'Urban - generic nomenclature with no subclass',
                'subclasses': {
                    '**': 'General urban'
                }
            },
            'UR': {
                'description': 'Urban residential',
                'subclasses': {
                    '1': 'Single family dwellings with lot sizes greater than 1 acre up to 5 acres (ranchettes, etc.)',
                    '2': 'Single family dwellings with a density of 1 unit/acre up to 8+ units per acre',
                    '3': 'Multiple family (apartments, condominiums, townhouses, barracks, bungalows, duplexes, etc.)',
                    '4': 'Trailer courts',
                    '11': '0% to 25% area irrigated',
                    '12': '26% to 50% area irrigated',
                    '13': '51% to 75% area irrigated',
                    '14': '76% to 100% area irrigated',
                    '21': '0% to 25% area irrigated',
                    '22': '26% to 50% area irrigated',
                    '23': '51% to 75% area irrigated',
                    '24': '76% to 100% area irrigated',
                    '31': '0% to 25% area irrigated',
                    '32': '26% to 50% area irrigated',
                    '33': '51% to 75% area irrigated',
                    '34': '76% to 100% area irrigated',
                    '41': '0% to 25% area irrigated',
                    '42': '26% to 50% area irrigated',
                    '43': '51% to 75% area irrigated',
                    '44': '76% to 100% area irrigated',
                    '**': 'General urban residential'
                }
            },
            'UC': {
                'description': 'Commercial',
                'subclasses': {
                    '1': 'Offices, retailers, etc.',
                    '2': 'Hotels',
                    '3': 'Motels',
                    '4': 'Recreation vehicle parking, camp sites',
                    '5': 'Institutions (hospitals, prisons, reformatories, asylums, etc., having a reasonably constant 24-hour resident population)',
                    '6': 'Schools (yards to be mapped separately if large enough)',
                    '7': 'Municipal auditoriums, theaters, churches, buildings and stands associated with race tracks, football stadiums, baseball parks, rodeo arenas, amusement parks, etc.',
                    '8': 'Miscellaneous highwater use (to be used to indicate a high water use condition not covered by the above categories)',
                    '**': 'General commercial'
                }
            },
            'UI': {
                'description': 'Industrial',
                'subclasses': {
                    '1': 'Manufacturing, assembling, and general processing',
                    '2': 'Extractive industries (oil fields, rock quarries, gravel pits, rock and gravel processing plants, etc.)',
                    '3': 'Storage and distribution (warehouses, substations, railroad marshalling yards, tank farms, etc.)',
                    '6': 'Saw mills',
                    '7': 'Oil refineries',
                    '8': 'Paper mills',
                    '9': 'Meat packing plants',
                    '10': 'Steel and aluminum mills',
                    '11': 'Fruit and vegetable canneries and general food processing',
                    '12': 'Miscellaneous highwater use (to be used to indicate a high water use condition not covered by other categories)',
                    '13': 'Sewage treatment plant including ponds',
                    '14': 'Waste accumulation sites (public dumps, sewage sludge sites, landfill and hazardous waste sites, etc.)',
                    '15': 'Wind farms, solar collector farms, etc.',
                    '**': 'General industrial'
                }
            },
            'UL': {
                'description': 'Urban Landscape',
                'subclasses': {
                    '1': 'Lawn area – irrigated',
                    '2': 'Golf course – irrigated',
                    '3': 'Ornamental landscape (excluding lawns) – irrigated',
                    '4': 'Cemeteries – irrigated',
                    '5': 'Cemeteries - not irrigated',
                    '**': 'General UL'
                }
            },
            'UV': {
                'description': 'Vacant',
                'subclasses': {
                    '1': 'Unpaved areas (vacant lots, graveled surfaces, play yards, developable open lands within urban areas, etc.)',
                    '3': 'Railroad right of way',
                    '4': 'Paved areas (parking lots, paved roads, oiled surfaces, flood control channels, tennis court areas, auto sales lots, etc.)',
                    '6': 'Airport runways',
                    '7': 'Land in urban area that is not developable',
                    '**': 'General vacant'
                }
            },
            'NC': {
                'description': 'Native class - generic nomenclature with no subclass',
                'subclasses': {
                    '**': 'General native class'
                }
            },
            'NV': {
                'description': 'Native vegetation',
                'subclasses': {
                    '1': 'Grassland',
                    '2': 'Light brush',
                    '3': 'Medium brush',
                    '4': 'Heavy brush',
                    '5': 'Brush and timber',
                    '6': 'Forest',
                    '7': 'Oak woodland',
                    '**': 'General native vegetation'
                }
            },
            'NR': {
                'description': 'Riparian vegetation',
                'subclasses': {
                    '1': 'Marsh lands, tules and sedges',
                    '2': 'Natural highwater table meadow',
                    '3': 'Trees, shrubs or other larger stream side or watercourse vegetation',
                    '4': 'Seasonal duck marsh, dry or only partially wet during summer',
                    '5': 'Permanent duck marsh, flooded during summer',
                    '**': 'General riparian vegetation'
                }
            },
            'NW': {
                'description': 'Water Surface',
                'subclasses': {
                    '1': 'River or stream (natural fresh water channels)',
                    '2': 'Water channel (all sizes - ditches and canals - delivering water for irrigation and urban use - e.g. State Water Project, Central Valley Project, water district canals, etc.)',
                    '3': 'Water channel (all sizes - ditches and canals - for removing on-farm drainage water - surface runoff and subsurface drainage - e.g. Colusa drain, drainage ditches in Imperial)',
                    '4': 'Freshwater lake, reservoir, or pond (all sizes, includes ponds for stock, recreation, groundwater recharge, managed wetlands, on-farm storage, etc.)',
                    '5': 'Brackish and saline water (includes areas in estuaries, inland water bodies, the ocean, etc.)',
                    '6': 'Wastewater pond (dairy, sewage, cannery, winery, etc.)',
                    '7': 'Paved water conveyance channels within urban areas (mainly for flood control)',
                    '**': 'General water surface'
                }
            },
            'NB': {
                'description': 'Barren and Wasteland',
                'subclasses': {
                    '1': 'Dry stream channels',
                    '2': 'Mine tailings',
                    '3': 'Barren land',
                    '4': 'Salt flats',
                    '5': 'Sand dunes',
                    '**': 'General barren and wasteland'
                }
            },
            'E': {
                'description': 'Entry denied - no subclass',
                'subclasses': {
                    '**': 'General entry denied'
                }
            },
            'Z': {
                'description': 'Outside of study area - no subclass',
                'subclasses': {
                    '**': 'General outside of study area'
                }
            }
        }

    def label_dataset(self, df, class_col, subclass_col):
        def get_description(class_code, subclass_code):
            class_info = self.class_mapping.get(class_code, {})
            class_desc = class_info.get('description', 'Unknown')
            subclass_desc = class_info.get('subclasses', {}).get(subclass_code, 'Unknown')
            return class_desc, subclass_desc

        df['CLASS_DESCRIPTION'], df['SUBCLASS_DESCRIPTION'] = zip(*df.apply(lambda row: get_description(row[class_col], row[subclass_col]), axis=1))
        return df

    def process_shapefile(self):
        single_crops = self.load_shapefile[self.load_shapefile['MULTIUSE'] == 'S']
        df = single_crops.copy()

        # Split MAIN_CROP into CLASS and SUBCLASS for the labeling
        df['CLASS_CODE'] = df['MAIN_CROP'].str.extract(r'([A-Za-z]+)')
        df['SUBCLASS_CODE'] = df['MAIN_CROP'].str.extract(r'(\d+)')
        # Label the dataset
        df = self.label_dataset(df, 'CLASS_CODE', 'SUBCLASS_CODE')
        df.to_file(self.output_shapefile_path, driver='ESRI Shapefile')
        return df

Class for Splitting the big shapefiles into individual crops based on their crop type

In [None]:
class ShapefileSplitter:
    '''
    Class for Seperating the Big i15 Labeled Shapefile Into Individual Ones
    '''
    def __init__(self, processed_shapefile_path, output_dir):
        self.processed_shapefile_path = processed_shapefile_path
        self.output_dir = output_dir
        self.shapefile_df = gpd.read_file(self.processed_shapefile_path)
        Path(self.output_dir).mkdir(parents=True, exist_ok=True)

    def split_shapefile_by_subclass(self):
        unique_subclasses = self.shapefile_df['SUBCLASS_D'].unique()

        for subclass in unique_subclasses:
            subclass_df = self.shapefile_df[self.shapefile_df['SUBCLASS_D'] == subclass]
            subclass_shapefile_name = f"{subclass.replace(' ', '_')}.shp"
            subclass_shapefile_path = Path(self.output_dir) / subclass_shapefile_name
            subclass_df.to_file(subclass_shapefile_path)

In [None]:
class ShapefileToMap:
    def __init__(self, shapefile_path):
        self.shapefile_path = Path(shapefile_path)
        self.gdf = None
        self.map = None

    def load_shapefile(self):
        """
        Load the shapefile into a GeoDataFrame.
        """
        if not self.shapefile_path.exists():
            raise FileNotFoundError(f"Shapefile {self.shapefile_path} not found.")
        
        self.gdf = gpd.read_file(self.shapefile_path)
        print(f"Shapefile {self.shapefile_path} loaded successfully.")

    def create_map(self, location=None, zoom_start=10):
        """
        Create a Folium map and add the shapefile data to it.
        """
        if self.gdf is None:
            raise ValueError("Geodataframe is empty. Load a shapefile first.")
        
        # Set the map's initial location and zoom level
        if location is None:
            # If no location is provided, use the centroid of the GeoDataFrame
            location = [self.gdf.geometry.centroid.y.mean(), self.gdf.geometry.centroid.x.mean()]
        
        self.map = folium.Map(location=location, zoom_start=zoom_start)
        
        # Add the shapefile data to the map
        folium.GeoJson(self.gdf).add_to(self.map)
        print(f"Map created with shapefile data.")

    def save_map(self, output_path):
        """
        Save the Folium map to an HTML file.
        """
        if self.map is None:
            raise ValueError("Map has not been created. Create a map first.")
        
        self.map.save(output_path)
        print(f"Map saved to {output_path}.")

    def show_map(self):
        """
        Display the Folium map.
        """
        if self.map is None:
            raise ValueError("Map has not been created. Create a map first.")
        
        return self.map

# Example usage

shapefile_path = r'C:\Users\brunolopez\mldata\crops_split_shp\Almonds.shp'
output_html = 'shapefile_map.html'
    
map_creator = ShapefileToMap(shapefile_path)
map_creator.load_shapefile()
map_creator.create_map()
map_creator.save_map(output_html)
    
    # To display the map in a Jupyter Notebook, you can use:
map_creator.show_map()
