# *eda-0B-create-crop-data.ipynb*

# Read in CDL, reproject, convert to ML format, and save

- Read in CDL
- Reproject and resample CDL to HLS projection (UTM)
- Convert to ML format
- Save down each vector

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import rasterio
import os

In [2]:
from rasterio.warp import reproject

## California 10SFH Almonds

Read in and reproject all CDLs
- specify years as range(2014,2023)
- specify state as 'Iowa' or 'California'
- specify tile as '10SFH' or '15TVG'
- output is a dictionary of CDLs: state_dict which includes each year as a key and each array as a value
- for each state-year, need to read in the national CDL, then use the reproject function to reproject and clip it to the HLS tile

In [3]:
def read_cdl(years=range(2018,2023),
             tile='10SFH',
             also_return_affines=False):
    
    if tile=='10SFH':
        full = 'HLS.L30.T10SFH.2020007T183941.v2.0'
        hls_path = f'../data/hls_23feb23/L30/2020/10/S/F/H/{full}/{full}.B03.tif'
    if tile=='15TVG':
        full = 'HLS.L30.T15TVG.2020007T170001.v2.0'
        hls_path = f'../data/hls_23feb23/L30/2020/15/T/V/G/{full}/{full}.B03.tif'
    
    src_hls = rasterio.open(hls_path)
    
    dict_of_arrays = {}
    dict_of_affines = {}
    
    for year in years:
        print(f'Working on CDL from year {year}...')
        cdl_path = f'../data/NationalCDL_{year}/{year}_30m_cdls.tif'
        with rasterio.open(cdl_path) as src_cdl:
            array, affine = reproject(source=src_cdl.read(1), # this is an array
              destination=src_hls.read(1), # this is another array
              src_transform=src_cdl.transform, # this is the transform corresponding to first array
              src_crs=src_cdl.crs, # this is the crs corresponding to first array
              dst_transform=src_hls.transform, # this is the transform corresponding to second array
              dst_crs=src_hls.crs, # this is the crs corresponding to second array
              )
        
        dict_of_arrays[str(year)] = array
        dict_of_affines[str(year)] = affine
        
    print('Finished with all CDLs.')
    
    if not also_return_affines:
        return dict_of_arrays
    if also_return_affines:
        return dict_of_arrays, dict_of_affines

In [4]:
dict_cdl_10SFH = read_cdl(years=range(2014,2023),
                     tile='10SFH')

Working on CDL from year 2014...
Working on CDL from year 2015...
Working on CDL from year 2016...
Working on CDL from year 2017...
Working on CDL from year 2018...
Working on CDL from year 2019...
Working on CDL from year 2020...
Working on CDL from year 2021...
Working on CDL from year 2022...
Finished with all CDLs.


In [18]:
dict_cdl_10SFH['2014'].flatten().dtype

dtype('int16')

In [15]:
dict_cdl_10SFH.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022'])

In [19]:
for year in dict_cdl_10SFH.keys():
    # save down the flattened thing
    np.save(f'../data/processed-crop/Crop_10SFH_{year}.npy',
        dict_cdl_10SFH[year].flatten(),
        allow_pickle=False)

### Create crop masks won't happen here

There will be no masking in this notebook - the vectors we save down will be the crop codes for each pixel

Note for later: Corn is id 1 and Almonds is id 75

## Iowa 15TVG Corn

In [20]:
dict_cdl_15TVG = read_cdl(years=range(2014,2023),
                     tile='10SFH')

Working on CDL from year 2014...
Working on CDL from year 2015...
Working on CDL from year 2016...
Working on CDL from year 2017...
Working on CDL from year 2018...
Working on CDL from year 2019...
Working on CDL from year 2020...
Working on CDL from year 2021...
Working on CDL from year 2022...
Finished with all CDLs.


In [21]:
dict_cdl_15TVG['2014'].flatten().dtype

dtype('int16')

In [23]:
dict_cdl_15TVG.keys()

dict_keys(['2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022'])

In [24]:
for year in dict_cdl_15TVG.keys():
    # save down the flattened thing
    np.save(f'../data/processed-crop/Crop_15TVG_{year}.npy',
        dict_cdl_15TVG[year].flatten(),
        allow_pickle=False)

Read in crop type codes:

In [8]:
cdlcnc = pd.read_csv('../data/CDL_codes_names_colors.csv', header=3)