In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from skimpy import clean_columns


In [9]:
landscape_cat = {
    "ag": [
        "Corn",
        "Cotton",
        "Rice",
        "Sorghum",
        "Soybeans",
        "Sunflower",
        "Peanuts",
        "Tobacco",
        "Sweet Corn",
        "Pop or Orn Corn",
        "Mint",
        "Barley",
        "Durum Wheat",
        "Spring Wheat",
        "Winter Wheat",
        "Other Small Grains",
        "Dbl Crop WinWht/Soybeans",
        "Rye",
        "Oats",
        "Millet",
        "Speltz",
        "Canola",
        "Flaxseed",
        "Safflower",
        "Rape Seed",
        "Mustard",
        "Alfalfa",
        "Other Hay/Non Alflafa",
        "Camelina",
        "Buckwheat",
        "Sugarbeets",
        "Dry Beans",
        "Potatoes",
        "Other Crops",
        "Sugarcane",
        "Sweet Potatoes",
        "Misc Vegs & Fruits",
        "Watermelons",
        "Onions",
        "Cucumbers",
        "Chick Peas",
        "Lentils",
        "Peas",
        "Tomatoes",
        "Caneberries",
        "Hops",
        "Herbs",
        "Clover/Wildflowers",
        "Sod/Grass Seed",
        "Switchgrass",
        "Cherries",
        "Peaches",
        "Apples",
        "Grapes",
        "Christmas Trees",
        "Other Tree Crops",
        "Citrus",
        "Pecans",
        "Almonds",
        "Walnuts",
        "Pears",
        "Pistachios",
        "Triticale",
        "Carrots",
        "Asparagus",
        "Garlic",
        "Cantaloupes",
        "Prunes",
        "Olives",
        "Oranges",
        "Honeydew Melons",
        "Broccoli",
        "Avocados",
        "Peppers",
        "Pomegranates",
        "Nectarines",
        "Greens",
        "Plums",
        "Strawberries",
        "Squash",
        "Apricots",
        "Vetch",
        "Dbl Crop WinWht/Corn",
        "Dbl Crop Oats/Corn",
        "Lettuce",
        "Dbl Crop Triticale/Corn",
        "Pumpkins",
        "Dbl Crop Lettuce/Durum Wht",
        "Dbl Crop Lettuce/Cantaloupe",
        "Dbl Crop Lettuce/Cotton",
        "Dbl Crop Lettuce/Barley",
        "Dbl Crop Durum Wht/Sorghum",
        "Dbl Crop Barley/Sorghum",
        "Dbl Crop WinWht/Sorghum",
        "Dbl Crop Barley/Corn",
        "Dbl Crop WinWht/Cotton",
        "Dbl Crop Soybeans/Cotton",
        "Dbl Crop Soybeans/Oats",
        "Dbl Crop Corn/Soybeans",
        "Blueberries",
        "Cabbage",
        "Cauliflower",
        "Celery",
        "Radishes",
        "Turnips",
        "Eggplants",
        "Gourds",
        "Cranberries",
        "Dbl Crop Barley/Soybeans",
        "Cropland",
    ],
    "nat": [
        "Forest",
        "Shrubland",
        "Barren",
        "Wetlands",
        "Nonag/Undefined",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Shrubland",
        "Woody Wetlands",
        "Herbaceous Wetlands",
    ],
    "semi_nat": ["Grassland/Pasture", "Grass/Pasture"],
    "dev": [
        "Developed/Open Space",
        "Developed/Low Intensity",
        "Developed/Med Intensity",
        "Developed/High Intensity",
        "Barren",
    ],
    "other": ["Aquaculture", "Water", "Perennial Ice/Snow", "Open Water"],
}


In [34]:
def cdl_processing(cdl_metadata, qgis_csv):

    cdl_metadata["cdl_number"] = "VALUE_" + cdl_metadata["cdl_number"].astype("str")
    qgis_csv.rename(
        columns=cdl_metadata.set_index("cdl_number")["cdl_classification"], inplace=True
    )
    qgis_csv = qgis_csv.drop(
        [
            "descriptio",
            "timestamp",
            "begin",
            "end",
            "altitudeMo",
            "tessellate",
            "extrude",
            "visibility",
            "drawOrder",
            "icon",
        ],
        axis=1,
    )
    
    qgis_csv["Total"] = qgis_csv.sum(axis=1, numeric_only=True)
    qgis_csv['Ag'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['ag'])].sum(axis=1)
    qgis_csv["Ag_Prop"] = qgis_csv["Ag"] / qgis_csv["Total"]
    qgis_csv['Nat'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['nat'])].sum(axis=1)
    qgis_csv["Nat_Prop"] = qgis_csv["Nat"] / qgis_csv["Total"]
    qgis_csv['Semi_Nat'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['semi_nat'])].sum(axis=1)
    qgis_csv["Semi_Nat_Prop"] = qgis_csv["Semi_Nat"] / qgis_csv["Total"]
    # qgis_csv['Dev'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['dev'])].sum(axis=1)
    #qgis_csv["Dev_Prop"] = qgis_csv["Dev"] / qgis_csv["Total"]
    # qgis_csv['Other'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['other'])].sum(axis=1)
    #qgis_csv["Other_Prop"] = qgis_csv["Other"] / qgis_csv["Total"]
    qgis_csv["Corn_Prop"] = qgis_csv["Corn"] / qgis_csv["Total"]
    qgis_csv["Soybean_Prop"] = qgis_csv["Soybeans"] / qgis_csv["Total"]
    #qgis_csv["Wheat_Prop"] = qgis_csv["Winter Wheat"] / qgis_csv["Total"]
    qgis_csv["Forest_Prop"] = (qgis_csv.filter(like='Forest').sum(axis=1)) / qgis_csv["Total"]
    #qgis_csv["Total_Nat_Prop"] = qgis_csv["Nat_Prop"] + qgis_csv["Semi_Nat_Prop"]

    return qgis_csv


In [35]:
metadata = pd.read_csv('cdl_metadata_ny_2023.csv')
qgis_file = pd.read_csv('cdl_for_shea_1000m.csv')

In [38]:
cdl_processing(metadata, qgis_file).to_csv('landscape_data_for_shea_1000m.csv')

In [4]:
def calc_props(year, buffer):
    metadata = pd.read_csv("metadata/cdl_metadata_ny_" + str(year) + ".csv")
    cdl_df = pd.read_csv(str(year) + "_ny_cdl/" + str(year) + "_ny_cdl_csv/2023_scm_" + str(year) + "_cdl_" + str(buffer) +"m.csv")
    cdl_df = cdl_processing(metadata, cdl_df)
    cdl_df["Buffer"] = buffer
    cdl_df = cdl_df.drop(columns=["Unnamed: 0"])
    cdl_df.to_csv("cdl_five_year_history/" + str(year) +"/2023_scm_" + str(year) + "_cdl_" + str(buffer) + "m.csv")

In [8]:
calc_props(2022, 500)
calc_props(2022, 1000)
calc_props(2022, 2000)
calc_props(2022, 3000)


# Process CDL Data for Models

In [9]:
cdl_2022_500m = pd.read_csv("cdl_five_year_history/2022/2023_scm_2022_cdl_500m.csv")
cdl_2022_500m = clean_columns(cdl_2022_500m)
cdl_2022_1000m = pd.read_csv("cdl_five_year_history/2022/2023_scm_2022_cdl_1000m.csv")
cdl_2022_1000m = clean_columns(cdl_2022_1000m)
cdl_2022_2000m = pd.read_csv("cdl_five_year_history/2022/2023_scm_2022_cdl_2000m.csv")
cdl_2022_2000m = clean_columns(cdl_2022_2000m)
cdl_2022_3000m = pd.read_csv("cdl_five_year_history/2022/2023_scm_2022_cdl_3000m.csv")
cdl_2022_3000m = clean_columns(cdl_2022_3000m)

cdl_2021_500m = pd.read_csv("cdl_five_year_history/2021/2023_scm_2021_cdl_500m.csv")
cdl_2021_500m = clean_columns(cdl_2021_500m)
cdl_2021_1000m = pd.read_csv("cdl_five_year_history/2021/2023_scm_2021_cdl_1000m.csv")
cdl_2021_1000m = clean_columns(cdl_2021_1000m)
cdl_2021_2000m = pd.read_csv("cdl_five_year_history/2021/2023_scm_2021_cdl_2000m.csv")
cdl_2021_2000m = clean_columns(cdl_2021_2000m)
cdl_2021_3000m = pd.read_csv("cdl_five_year_history/2021/2023_scm_2021_cdl_3000m.csv")
cdl_2021_3000m = clean_columns(cdl_2021_3000m)

cdl_2020_500m = pd.read_csv("cdl_five_year_history/2020/2023_scm_2020_cdl_500m.csv")
cdl_2020_500m = clean_columns(cdl_2020_500m)
cdl_2020_1000m = pd.read_csv("cdl_five_year_history/2020/2023_scm_2020_cdl_1000m.csv")
cdl_2020_1000m = clean_columns(cdl_2020_1000m)
cdl_2020_2000m = pd.read_csv("cdl_five_year_history/2020/2023_scm_2020_cdl_2000m.csv")
cdl_2020_2000m = clean_columns(cdl_2020_2000m)
cdl_2020_3000m = pd.read_csv("cdl_five_year_history/2020/2023_scm_2020_cdl_3000m.csv")
cdl_2020_3000m = clean_columns(cdl_2020_3000m)

cdl_2019_500m = pd.read_csv("cdl_five_year_history/2019/2023_scm_2019_cdl_500m.csv")
cdl_2019_500m = clean_columns(cdl_2019_500m)
cdl_2019_1000m = pd.read_csv("cdl_five_year_history/2019/2023_scm_2019_cdl_1000m.csv")
cdl_2019_1000m = clean_columns(cdl_2019_1000m)
cdl_2019_2000m = pd.read_csv("cdl_five_year_history/2019/2023_scm_2019_cdl_2000m.csv")
cdl_2019_2000m = clean_columns(cdl_2019_2000m)
cdl_2019_3000m = pd.read_csv("cdl_five_year_history/2019/2023_scm_2019_cdl_3000m.csv")
cdl_2019_3000m = clean_columns(cdl_2019_3000m)

cdl_2018_500m = pd.read_csv("cdl_five_year_history/2018/2023_scm_2018_cdl_500m.csv")
cdl_2018_500m = clean_columns(cdl_2018_500m)
cdl_2018_1000m = pd.read_csv("cdl_five_year_history/2018/2023_scm_2018_cdl_1000m.csv")
cdl_2018_1000m = clean_columns(cdl_2018_1000m)
cdl_2018_2000m = pd.read_csv("cdl_five_year_history/2018/2023_scm_2018_cdl_2000m.csv")
cdl_2018_2000m = clean_columns(cdl_2018_2000m)
cdl_2018_3000m = pd.read_csv("cdl_five_year_history/2018/2023_scm_2018_cdl_3000m.csv")
cdl_2018_3000m = clean_columns(cdl_2018_3000m)


In [10]:
def cdl_processing_for_models(cdl_data, year, buffer_range):
    cdl_data = cdl_data[
        ["name", "ag_prop", "nat_prop", "semi_nat_prop", "corn_prop", "soybean_prop", "wheat_prop", "forest_prop", "total_nat_prop"]
    ]
    cdl_data = cdl_data.add_suffix("_" + str(year))
    cdl_data = cdl_data.add_suffix("_" + str(buffer_range))
    return cdl_data

In [11]:
cdl_2022_500m = cdl_processing_for_models(cdl_2022_500m, 2022, 500)
cdl_2022_1000m = cdl_processing_for_models(cdl_2022_1000m, 2022, 1000)
cdl_2022_2000m = cdl_processing_for_models(cdl_2022_2000m, 2022, 2000)
cdl_2022_3000m = cdl_processing_for_models(cdl_2022_3000m, 2022, 3000)

cdl_2021_500m = cdl_processing_for_models(cdl_2021_500m, 2021, 500)
cdl_2021_1000m = cdl_processing_for_models(cdl_2021_1000m, 2021, 1000)
cdl_2021_2000m = cdl_processing_for_models(cdl_2021_2000m, 2021, 2000)
cdl_2021_3000m = cdl_processing_for_models(cdl_2021_3000m, 2021, 3000)

cdl_2020_500m = cdl_processing_for_models(cdl_2020_500m, 2020, 500)
cdl_2020_1000m = cdl_processing_for_models(cdl_2020_1000m, 2020, 1000)
cdl_2020_2000m = cdl_processing_for_models(cdl_2020_2000m, 2020, 2000)
cdl_2020_3000m = cdl_processing_for_models(cdl_2020_3000m, 2020, 3000)

cdl_2019_500m = cdl_processing_for_models(cdl_2019_500m, 2019, 500)
cdl_2019_1000m = cdl_processing_for_models(cdl_2019_1000m, 2019, 1000)
cdl_2019_2000m = cdl_processing_for_models(cdl_2019_2000m, 2019, 2000)
cdl_2019_3000m = cdl_processing_for_models(cdl_2019_3000m, 2019, 3000)

cdl_2018_500m = cdl_processing_for_models(cdl_2018_500m, 2018, 500)
cdl_2018_1000m = cdl_processing_for_models(cdl_2018_1000m, 2018, 1000)
cdl_2018_2000m = cdl_processing_for_models(cdl_2018_2000m, 2018, 2000)
cdl_2018_3000m = cdl_processing_for_models(cdl_2018_3000m, 2018, 3000)
