In [2]:
import pandas as pd
import numpy as np
from skimpy import clean_columns
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
landscape_cat = {
    "ag": [
        "Corn",
        "Cotton",
        "Rice",
        "Sorghum",
        "Soybeans",
        "Sunflower",
        "Peanuts",
        "Tobacco",
        "Sweet Corn",
        "Pop or Orn Corn",
        "Mint",
        "Barley",
        "Durum Wheat",
        "Spring Wheat",
        "Winter Wheat",
        "Other Small Grains",
        "Dbl Crop WinWht/Soybeans",
        "Rye",
        "Oats",
        "Millet",
        "Speltz",
        "Canola",
        "Flaxseed",
        "Safflower",
        "Rape Seed",
        "Mustard",
        "Alfalfa",
        "Other Hay/Non Alflafa",
        "Camelina",
        "Buckwheat",
        "Sugarbeets",
        "Dry Beans",
        "Potatoes",
        "Other Crops",
        "Sugarcane",
        "Sweet Potatoes",
        "Misc Vegs & Fruits",
        "Watermelons",
        "Onions",
        "Cucumbers",
        "Chick Peas",
        "Lentils",
        "Peas",
        "Tomatoes",
        "Caneberries",
        "Hops",
        "Herbs",
        "Clover/Wildflowers",
        "Sod/Grass Seed",
        "Switchgrass",
        "Cherries",
        "Peaches",
        "Apples",
        "Grapes",
        "Christmas Trees",
        "Other Tree Crops",
        "Citrus",
        "Pecans",
        "Almonds",
        "Walnuts",
        "Pears",
        "Pistachios",
        "Triticale",
        "Carrots",
        "Asparagus",
        "Garlic",
        "Cantaloupes",
        "Prunes",
        "Olives",
        "Oranges",
        "Honeydew Melons",
        "Broccoli",
        "Avocados",
        "Peppers",
        "Pomegranates",
        "Nectarines",
        "Greens",
        "Plums",
        "Strawberries",
        "Squash",
        "Apricots",
        "Vetch",
        "Dbl Crop WinWht/Corn",
        "Dbl Crop Oats/Corn",
        "Lettuce",
        "Dbl Crop Triticale/Corn",
        "Pumpkins",
        "Dbl Crop Lettuce/Durum Wht",
        "Dbl Crop Lettuce/Cantaloupe",
        "Dbl Crop Lettuce/Cotton",
        "Dbl Crop Lettuce/Barley",
        "Dbl  Crop Durum Wht/Sorghum",
        "Dbl Crop Barley/Sorghum",
        "Dbl Crop WinWht/Sorghum",
        "Dbl Crop Barley/Corn",
        "Dbl Crop WinWht/Cotton",
        "Dbl Crop Soybeans/Cotton",
        "Dbl Crop Soybeans/Oats",
        "Dbl Crop Corn/Soybeans",
        "Blueberries",
        "Cabbage",
        "Cauliflower",
        "Celery",
        "Radishes",
        "Turnips",
        "Eggplants",
        "Gourds",
        "Cranberries",
        "Dbl Crop Barley/Soybeans",
        "Cropland",
    ],
    "nat": [
        "Forest",
        "Shrubland",
        "Barren",
        "Wetlands",
        "Nonag/Undefined",
        "Deciduous Forest",
        "Evergreen Forest",
        "Mixed Forest",
        "Shrubland",
        "Woody Wetlands",
        "Herbaceous Wetlands",
    ],
    "semi_nat": ["Grassland/Pasture"],
    "dev": [
        "Developed/Open Space",
        "Developed/Low Intensity",
        "Developed/Med Intensity",
        "Developed/High Intensity",
        "Barren",
    ],
    "other": [
        "Aquaculture",
        "Water",
        "Perennial Ice/Snow",
    ],
}


In [4]:
def cdl_processing(cdl_metadata, qgis_csv):
    cdl_metadata["cdl_number"] = "VALUE_" + cdl_metadata["cdl_number"].astype("str")
    qgis_csv.rename(
        columns=cdl_metadata.set_index("cdl_number")["cdl_classification"], inplace=True
    )
    qgis_csv = qgis_csv.drop(
        [
            "descriptio",
            "timestamp",
            "begin",
            "end",
            "altitudeMo",
            "tessellate",
            "extrude",
            "visibility",
            "drawOrder",
            "icon",
        ],
        axis=1,
    )
    qgis_csv["Total"] = qgis_csv.sum(axis=1, numeric_only=True)
    qgis_csv['Ag'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['ag'])].sum(axis=1)
    qgis_csv['Ag_Prop'] = qgis_csv['Ag'] / qgis_csv['Total']
    qgis_csv['Nat'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['nat'])].sum(axis=1)
    qgis_csv['Nat_Prop'] = qgis_csv['Nat'] / qgis_csv['Total']
    qgis_csv['Semi_Nat'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['semi_nat'])].sum(axis=1)
    qgis_csv['Semi_Nat_Prop'] = qgis_csv['Semi_Nat'] / qgis_csv['Total']
    qgis_csv['Dev'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['dev'])].sum(axis=1)
    qgis_csv['Dev_Prop'] = qgis_csv['Dev'] / qgis_csv['Total']
    qgis_csv['Other'] = qgis_csv[qgis_csv.columns.intersection(landscape_cat['other'])].sum(axis=1)
    qgis_csv['Other_Prop'] = qgis_csv['Other'] / qgis_csv['Total']
    qgis_csv['Corn_Prop'] = (qgis_csv['Corn'] + qgis_csv['Sweet Corn']) / qgis_csv['Total']

    return qgis_csv


In [9]:
metadata = pd.read_csv("Data/cdl_metadata_ny_2022.csv")


In [6]:
qgis_df_500m = pd.read_csv(
    "Data/scm_overwintering_500m.csv"
)

cdl_500m = cdl_processing(metadata, qgis_df_500m)
cdl_500m.head()

Unnamed: 0,Name,Corn,Soybeans,Sweet Corn,Winter Wheat,Oats,Alfalfa,Other Hay/Non Alflafa,Buckwheat,Dry Beans,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Corn_Prop
0,GRN_ORC,116,245,4,98,5,313,113,0,4,...,0.589109,290,0.179455,310,0.191832,0,0.0,0.0,0.0,0.074257
1,GRS_ORC,50,66,4,34,0,254,73,0,0,...,0.360422,258,0.16005,647,0.401365,110,0.068238,0.0,0.0,0.033499
2,GRS_FIE,51,85,4,36,0,247,87,0,0,...,0.385378,209,0.129492,668,0.413879,102,0.063197,0.0,0.0,0.034077
3,PAT_FIE,917,43,6,6,0,83,91,0,0,...,0.709158,414,0.256188,35,0.021658,19,0.011757,0.0,0.0,0.571163
4,CUR_FIE,561,89,0,0,0,257,143,0,0,...,0.660389,171,0.107345,235,0.14752,135,0.084746,0.0,0.0,0.352166


In [8]:
qgis_df_1000m = pd.read_csv(
    "Data/scm_overwintering_1000m.csv"
)

cdl_1000m = cdl_processing(metadata, qgis_df_1000m)
cdl_1000m.head()

Unnamed: 0,Name,Corn,Sorghum,Soybeans,Sunflower,Sweet Corn,Barley,Winter Wheat,Rye,Oats,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Corn_Prop
0,GRN_ORC,709,0,905,0,9,0,130,0,6,...,0.460037,1895,0.293525,1262,0.195477,248,0.038414,0.0,0.0,0.111214
1,GRS_ORC,737,0,787,0,7,0,105,0,2,...,0.447181,1303,0.201828,1527,0.236524,669,0.103625,0.0,0.0,0.115242
2,GRS_FIE,684,0,769,0,9,0,113,0,2,...,0.450759,1375,0.212914,1545,0.239238,559,0.086559,0.0,0.0,0.107309
3,PAT_FIE,2667,2,478,0,31,0,18,0,0,...,0.75766,951,0.147168,308,0.047663,303,0.04689,0.0,0.0,0.417518
4,CUR_FIE,2157,0,416,0,0,2,2,0,0,...,0.601978,1026,0.161093,939,0.147433,566,0.088868,0.0,0.0,0.338672


In [10]:
qgis_df_2000m = pd.read_csv(
    "Data/scm_overwintering_2000m.csv"
)

cdl_2000m = cdl_processing(metadata, qgis_df_2000m)
cdl_2000m.head()

Unnamed: 0,Name,Corn,Sorghum,Soybeans,Sunflower,Sweet Corn,Barley,Spring Wheat,Winter Wheat,Rye,...,Ag_Prop,Nat,Nat_Prop,Semi_Nat,Semi_Nat_Prop,Dev,Dev_Prop,Other,Other_Prop,Corn_Prop
0,GRN_ORC,4273,4,2398,0,12,0,0,236,1,...,0.440375,4814,0.186387,5470,0.211786,4039,0.156381,0.0,0.0,0.165905
1,GRS_ORC,3105,7,3338,0,12,0,0,602,3,...,0.42231,5795,0.224378,4724,0.182909,4267,0.165215,0.0,0.0,0.120688
2,GRS_FIE,3010,7,3248,0,12,0,0,583,3,...,0.419158,5718,0.221388,4892,0.189407,4258,0.16486,0.0,0.0,0.117005
3,PAT_FIE,8889,4,2616,0,57,0,0,375,4,...,0.715065,3762,0.145504,1820,0.070393,1494,0.057784,0.0,0.0,0.346007
4,CUR_FIE,5850,2,488,0,4,2,0,12,1,...,0.404414,8242,0.323673,4931,0.193646,1978,0.077678,0.0,0.0,0.229893


In [11]:
cdl_500m.to_csv('Data/scm_overwintering_cdl_500m.csv')
cdl_1000m.to_csv('Data/scm_overwintering_cdl_1000m.csv')
cdl_2000m.to_csv('Data/scm_overwintering_cdl_2000m.csv')