<a href="https://colab.research.google.com/github/dgg32/crop_area/blob/main/Google_Earth_Engine_USA_Crops_Area.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Run me first

First of all, run the following cell to initialize the API. The output will contain instructions on how to grant this notebook access to Earth Engine using your account.

In [None]:
from pathlib import Path
import pandas as pd
import ee

In [None]:

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

In [None]:
g_drive_folder = "crop_1e10_post_2008"

In [None]:
areaPerPixel = ee.Image.pixelArea()

provinces = ee.FeatureCollection("FAO/GAUL/2015/level2");

def calculate_area (state, crop_index, crop, startDate, endDate):
    def generate_collection(feature):
      geometry = feature.geometry()
      
      dataset = ee.ImageCollection('USDA/NASS/CDL').filter(ee.Filter.date(startDate, endDate)).select("cropland").first()
                  
      o_dataset = dataset.clip(geometry)


      image = dataset.eq(crop_index).selfMask().multiply(areaPerPixel).divide(1e6).set('system:time_start', startDate)
      
      
      reduce = image.select('cropland').reduceRegion(
                                                reducer= ee.Reducer.sum(),
                                                geometry= geometry,
                                                scale= 30,
                                                maxPixels = 1e10
                                                #bestEffort= True
                                            )
            
      return feature.set(crop, reduce.get("cropland"))

    filterLevel1 = ee.Filter.eq('ADM1_NAME', state)
    o_state = provinces.filter(filterLevel1)

    corn_areas = o_state.map(generate_collection)

    results = corn_areas.reduceColumns(ee.Reducer.sum(), [crop])


    return results.values(["sum"]).get(0)

tasks = []

name_list = ee.ImageCollection('USDA/NASS/CDL').select("cropland").first().get("cropland_class_names").getInfo()


crops = ["Corn", "Cotton", "Soybeans"]

#states got from my state script
state_list = ee.List(["Washington", "Wisconsin", "Wyoming", "Maine", "Michigan", "Minnesota", "Montana", "New Hampshire",
                      "New York", "North Dakota", "South Dakota", "Vermont", "West Virginia", "Alabama", "Alaska", "Arizona",
                      "Arkansas", "California", "Colorado", "Georgia", "Hawaii", "Idaho", "Illinois", "Indiana", "Iowa", "Kansas",
                      "Kentucky", "Louisiana", "Maryland", "Mississippi", "Missouri", "Nebraska", "Nevada", "New Mexico", "Ohio",
                      "Oklahoma", "Oregon", "Pennsylvania", "Tennessee", "Texas", "Utah", "Virginia", "Connecticut", "Delaware",
                      "District of Columbia", "Florida", "Massachusetts", "New Jersey", "North Carolina", "Rhode Island", "South Carolina"])

for crop in crops:
    ###Before 2008, the CDL didnot cover the whole usa
    for year in range(2008, 2022):
        startDate = f'{year}-01-01'
        endDate = ee.Date(startDate).advance(1, "year")

        crop_index = name_list.index(crop) + 1

        results = state_list.map(lambda state: calculate_area(state, crop_index, crop, startDate, endDate)).reduce(ee.Reducer.sum())


        task = ee.batch.Export.table.toDrive(collection=ee.FeatureCollection([ee.Feature(None, {"time": startDate, "crop": crop, "area": results})]),
                                        description=f'{crop}_{startDate}_python_crop',
                                        folder=g_drive_folder,
                                        fileFormat='CSV')
        task.start()

        tasks.append(task)


In [None]:
for task in tasks:
    print (task.status())
    #task.cancel()

In [None]:
from google.colab import drive

drive.mount('/content/gdrive/', force_remount=True)



Mounted at /content/gdrive/


In [None]:

pathlist = Path(f"gdrive/MyDrive/{g_drive_folder}").glob('*.csv')

rows_list = []

for file in pathlist:
    headers = []
    for line in open(file, 'r'):
        
        if len(headers) == 0:
            headers = line.strip().split(",")
        else:
            temp_dict = {}
            fields = line.strip().split(",")
            for header, cell in zip(headers, fields):
                if header != "system:index" and header != ".geo":
                    temp_dict[header] = cell
            rows_list.append(temp_dict)

df = pd.DataFrame(rows_list)

df.head()

Unnamed: 0,area,crop,time
0,384368.0105334144,Corn,2012-01-01
1,341347.7255545258,Corn,2009-01-01
2,376609.3646369319,Corn,2013-01-01
3,351462.3076486588,Corn,2010-01-01
4,368034.9293616259,Corn,2011-01-01


In [None]:
df["time"] = pd.to_datetime(df["time"], format='%Y-%m-%d')
df.sort_values(['crop', 'time'], inplace=True)
df.to_csv("crop_area.tsv", sep="\t", index=False)