In [None]:
import os
import requests
import zipfile
from pathlib import Path
import geopandas as gpd
from tqdm import tqdm

Downloading the county shape files

In [None]:
def download_tiger_counties(year: int, output_dir: str = "./dataset/county_shapefiles"):
    """
    Downloads TIGER/Line U.S. county shapefile for a specific year and unzips it.
    Returns the path to the extracted shapefile.
    """
    base_url = f"https://www2.census.gov/geo/tiger/TIGER{year}/COUNTY/"
    filename = f"tl_{year}_us_county.zip"
    download_url = base_url + filename

    # Create directory if not exists
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    zip_path = os.path.join(output_dir, filename)
    extract_path = os.path.join(output_dir, f"tl_{year}_us_county")

    if not os.path.exists(zip_path):
        print(f"Downloading {filename}...")
        response = requests.get(download_url, stream=True)
        if response.status_code == 200:
            with open(zip_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=1024):
                    if chunk:
                        f.write(chunk)
            print("Downloaded.")
        else:
            raise Exception(f"Failed to download: {download_url} (Status code: {response.status_code})")

    # Unzip if not already extracted
    if not os.path.exists(extract_path):
        print(f"Unzipping to {extract_path}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        print("Unzipped.")
    else:
        print(f"Already unzipped: {extract_path}")

    return extract_path

def load_county_shapefile(year: int, output_dir: str = "./dataset/county_shapefiles") -> gpd.GeoDataFrame:
    """
    Loads the unzipped shapefile for the specified year into a GeoDataFrame.
    """
    shapefile_dir = download_tiger_counties(year, output_dir)
    shp_file = [f for f in os.listdir(shapefile_dir) if f.endswith(".shp")][0]
    shp_path = os.path.join(shapefile_dir, shp_file)

    gdf = gpd.read_file(shp_path)
    print(f"Loaded shapefile for {year}. Columns: {list(gdf.columns)}")
    return gdf

# Example usage:
if __name__ == "__main__":
    year = 2022  # You can loop over [2017, 2018, ..., 2022]
    counties_gdf = load_county_shapefile(year)
    print(counties_gdf.head())


Downloading the cdl data

In [None]:
def download_national_cdl(years, out_dir='./dataset/cdl_data'):
    base_url = "https://www.nass.usda.gov/Research_and_Science/Cropland/Release/datasets/"
    os.makedirs(out_dir, exist_ok=True)

    for year in years:
        filename = f"{year}_30m_cdls.zip"
        url = base_url + filename
        zip_path = os.path.join(out_dir, filename)
        extract_path = os.path.join(out_dir, str(year))

        if os.path.exists(extract_path):
            print(f"[✓] Already extracted: {extract_path}")
            continue

        if not os.path.exists(zip_path):
            print(f"Downloading {filename}...")
            response = requests.get(url, stream=True)
            if response.status_code != 200:
                print(f"[!] Failed to download: {filename}")
                continue

            total = int(response.headers.get('content-length', 0))
            with open(zip_path, 'wb') as file, tqdm(
                desc=filename, total=total, unit='iB', unit_scale=True, unit_divisor=1024
            ) as bar:
                for chunk in response.iter_content(chunk_size=1024):
                    file.write(chunk)
                    bar.update(len(chunk))

            print(f"[✓] Download complete: {zip_path}")
        else:
            print(f"[✓] Already downloaded: {zip_path}")

        # Unzip the file
        print(f"Unzipping {filename}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_path)
        print(f"[✓] Unzipped to: {extract_path}")

# Download 2017–2022 CDL national files
years_to_download = list(range(2022, 2023))
download_national_cdl(years_to_download)


In [None]:
from cropnet.data_downloader import DataDownloader

# Initialize the downloader with your target directory
downloader = DataDownloader(target_dir="./dataset")

years = ["2022"]
crops = ['Corn', 'Cotton', 'Soybean', 'WinterWheat']



for crop in crops:
    downloader.download_USDA(crop, fips_codes=None, years=years)
    
# sentiel-2 ndvi data and hrrr was downloaded from an online drive linked to a colab file on cropnet's official hugging face 

In [None]:
from cropnet.data import DataDownloader

# Set parameters
state = "IA"             # Iowa
county_fips = 19109      # Kossuth County
year = 2020              # Choose year
crop = "corn"            # Crop type (needed even if you only want Sentinel NDVI)

# Initialize downloader
downloader = DataDownloader(
    output_dir="./cropnet_data", 
    states=[state],
    crops=[crop],
    years=[year]
)

# Download data
downloader.pull()

print("Download complete. Check './cropnet_data'")