In [3]:
import rasterio
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point
import numpy as np
from tqdm import tqdm
import os
pd.set_option('display.max_columns', None)

In [2]:
# Function to get raster info and pixel count
def get_raster_info(raster_path):
    with rasterio.open(raster_path) as src:
        total_pixels = src.width * src.height
        valid_pixels = np.sum(src.read(1) != src.nodata)
        return {
            "Total pixels": total_pixels,
            "Valid pixels": valid_pixels,
            "NoData pixels": total_pixels - valid_pixels,
            "Width": src.width,
            "Height": src.height,
        }

In [3]:
def create_base_gdf(raster_path, file_name):
    with rasterio.open(raster_path + file_name) as src:
        data = src.read(1)
        transform = src.transform
        crs = src.crs
        
        # Create a grid of all pixel centroids
        rows, cols = np.meshgrid(range(src.height), range(src.width), indexing='ij')
        xs, ys = rasterio.transform.xy(transform, rows.ravel(), cols.ravel(), offset='center')
        
        # Create points for non-NoData values
        mask = data.ravel() != src.nodata
        points = [Point(x, y) for x, y in zip(np.array(xs)[mask], np.array(ys)[mask])]
        values = data.ravel()[mask]
        
        food_name = file_name.split('_')[1]
        
    return gpd.GeoDataFrame({'geometry': points, food_name: values}, crs=crs)

In [4]:
def sample_raster(gdf, raster_path, file_name):
    food_name = file_name.split('_')[1]
    with rasterio.open(raster_path + file_name) as src:
        # Sample the raster at the GeoDataFrame's point locations
        coords = [(x, y) for x, y in zip(gdf.geometry.x, gdf.geometry.y)]
        gdf[food_name] = [x[0] for x in src.sample(coords)]
    return gdf

In [9]:
def process_geotiffs(raster_path, file_list):
    # Create base GeoDataFrame from the first file
    base_gdf = create_base_gdf(raster_path, file_list[0])
    print(f"Created base GeoDataFrame from {file_list[0]}")
    
    # Sample remaining files
    for file in tqdm(file_list[1:], desc="Processing files"):
        base_gdf = sample_raster(base_gdf, raster_path, file)
    
    return base_gdf

In [7]:
# use os to list all filenames  in /Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/2020_gadm_foodgrouprasters/fg2/
raster_path = '/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/2020_gadm_foodgrouprasters/fg2/'
file_list = os.listdir(raster_path)
file_list = [x for x in file_list if not x.startswith('.')]
file_list

['2020_Vegetables_tonnage_gadm.tif',
 '2020_Sweetners_tonnage_gadm.tif',
 '2020_Vegetable-Oils_tonnage_gadm.tif',
 '2020_Treenuts_tonnage_gadm.tif',
 '2020_Stimulants_tonnage_gadm.tif',
 '2020_Spices_tonnage_gadm.tif',
 '2020_Sugar-Crops_tonnage_gadm.tif',
 '2020_Milk-Excluding-Butter_tonnage_gadm.tif',
 '2020_Oilcrops_tonnage_gadm.tif',
 '2020_Pulses_tonnage_gadm.tif',
 '2020_Cereals-Excluding-Beer_tonnage_gadm.tif',
 '2020_Aquatic-Products-Other_tonnage_gadm.tif',
 '2020_Offals_tonnage_gadm.tif',
 '2020_Fish-Seafood_tonnage_gadm.tif',
 '2020_Meat_tonnage_gadm.tif',
 '2020_Eggs_tonnage_gadm.tif',
 '2020_Alcoholic-Beverages_tonnage_gadm.tif',
 '2020_Fruits-Excluding-Wine_tonnage_gadm.tif',
 '2020_Starchy-Roots_tonnage_gadm.tif',
 '2020_Animal-Fats_tonnage_gadm.tif']

In [10]:
result_gdf = process_geotiffs(raster_path, file_list)

Created base GeoDataFrame from 2020_Vegetables_tonnage_gadm.tif


Processing files: 100%|██████████| 19/19 [47:03<00:00, 148.60s/it]


In [36]:
result_gdf = result_gdf.replace(0, np.nan)

In [37]:
non_geometry_columns = result_gdf.columns.drop('geometry')
# Keep rows where at least one non-geometry column is not NaN
gdf_cleaned = result_gdf[result_gdf[non_geometry_columns].notna().any(axis=1)]
print(f"Original shape: {result_gdf.shape}")
print(f"Cleaned shape: {gdf_cleaned.shape}")
print(f"Removed {result_gdf.shape[0] - gdf_cleaned.shape[0]} rows")

Original shape: (9331200, 21)
Cleaned shape: (1918519, 21)
Removed 7412681 rows


In [39]:
gdf_cleaned

Unnamed: 0,geometry,Vegetables,Sweetners,Vegetable-Oils,Treenuts,Stimulants,Spices,Sugar-Crops,Milk-Excluding-Butter,Oilcrops,Pulses,Cereals-Excluding-Beer,Aquatic-Products-Other,Offals,Fish-Seafood,Meat,Eggs,Alcoholic-Beverages,Fruits-Excluding-Wine,Starchy-Roots,Animal-Fats
364100,POINT (-78.29167 82.95833),,,,,,,,3.350658,,,,,0.068162,,1.525191,0.042130,,,,0.467383
368396,POINT (-80.29167 82.875),,,,,,,,0.835334,,,,,0.012665,,0.298146,0.008088,,,,0.115728
368404,POINT (-79.625 82.875),,,,,,,,4.513004,,,,,0.058110,,1.391083,0.027714,,,,0.623350
368406,POINT (-79.45833 82.875),,,,,,,,4.714489,,,,,0.084159,,1.900677,0.043892,,,,0.655487
368407,POINT (-79.375 82.875),,,,,,,,5.600649,,,,,0.106315,,2.385633,0.058007,,,,0.779842
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7548383,POINT (-68.04167 -55.625),,,,,,,,4.534473,,,,100.398872,0.057795,787.531677,2.815090,0.715552,,,,0.602658
7552709,POINT (-67.54167 -55.70833),,,,,,,,,,,,100.398872,,787.531677,,,,,,
7552710,POINT (-67.45833 -55.70833),,,,,,,,,,,,100.398872,,787.531677,,,,,,
7561348,POINT (-67.625 -55.875),,,,,,,,,,,,100.398872,,787.531677,,,,,,


In [40]:
gdf_cleaned.to_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-full.geojson', driver='GeoJSON')

# Z-Score Normalization

In [None]:
# do z-score normalization on each of the food group columns and add the values as a new column with the znormal_ prefix
for column in gdf_cleaned.columns:
    if column != 'geometry':
        gdf_cleaned['znormal_' + column] = (gdf_cleaned[column] - gdf_cleaned[column].mean()) / gdf_cleaned[column].std()

In [42]:
gdf_cleaned

Unnamed: 0,geometry,Vegetables,Sweetners,Vegetable-Oils,Treenuts,Stimulants,Spices,Sugar-Crops,Milk-Excluding-Butter,Oilcrops,Pulses,Cereals-Excluding-Beer,Aquatic-Products-Other,Offals,Fish-Seafood,Meat,Eggs,Alcoholic-Beverages,Fruits-Excluding-Wine,Starchy-Roots,Animal-Fats,znormal_Vegetables,znormal_Sweetners,znormal_Vegetable-Oils,znormal_Treenuts,znormal_Stimulants,znormal_Spices,znormal_Sugar-Crops,znormal_Milk-Excluding-Butter,znormal_Oilcrops,znormal_Pulses,znormal_Cereals-Excluding-Beer,znormal_Aquatic-Products-Other,znormal_Offals,znormal_Fish-Seafood,znormal_Meat,znormal_Eggs,znormal_Alcoholic-Beverages,znormal_Fruits-Excluding-Wine,znormal_Starchy-Roots,znormal_Animal-Fats
364100,POINT (-78.29167 82.95833),,,,,,,,3.350658,,,,,0.068162,,1.525191,0.042130,,,,0.467383,,,,,,,,-0.282098,,,,,-0.349388,,-0.248993,-0.169072,,,,-0.275192
368396,POINT (-80.29167 82.875),,,,,,,,0.835334,,,,,0.012665,,0.298146,0.008088,,,,0.115728,,,,,,,,-0.283542,,,,,-0.351325,,-0.250708,-0.169186,,,,-0.279090
368404,POINT (-79.625 82.875),,,,,,,,4.513004,,,,,0.058110,,1.391083,0.027714,,,,0.623350,,,,,,,,-0.281430,,,,,-0.349739,,-0.249181,-0.169120,,,,-0.273463
368406,POINT (-79.45833 82.875),,,,,,,,4.714489,,,,,0.084159,,1.900677,0.043892,,,,0.655487,,,,,,,,-0.281315,,,,,-0.348830,,-0.248469,-0.169066,,,,-0.273107
368407,POINT (-79.375 82.875),,,,,,,,5.600649,,,,,0.106315,,2.385633,0.058007,,,,0.779842,,,,,,,,-0.280806,,,,,-0.348057,,-0.247791,-0.169019,,,,-0.271729
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7548383,POINT (-68.04167 -55.625),,,,,,,,4.534473,,,,100.398872,0.057795,787.531677,2.815090,0.715552,,,,0.602658,,,,,,,,-0.281418,,,,-0.205162,-0.349750,-0.14292,-0.247191,-0.166829,,,,-0.273692
7552709,POINT (-67.54167 -55.70833),,,,,,,,,,,,100.398872,,787.531677,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,
7552710,POINT (-67.45833 -55.70833),,,,,,,,,,,,100.398872,,787.531677,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,
7561348,POINT (-67.625 -55.875),,,,,,,,,,,,100.398872,,787.531677,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,


In [None]:
gdf_cleaned.columns = gdf_cleaned.columns.str.lower()
# for all the columns that begin with znormal_ create a column that is the column name of the max value
max_columns = [col for col in gdf_cleaned.columns if 'znormal_' in col]
gdf_cleaned['max_food_group'] = gdf_cleaned[max_columns].idxmax(axis=1).str.replace('znormal_', '')

In [48]:
gdf_cleaned.to_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-normal.geojson', driver='GeoJSON')

In [75]:
# drop any columns that are not znormal_ or max_food_group
gdf_normalized = gdf_cleaned.drop(columns=[col for col in gdf_cleaned.columns if 'znormal_' not in col and col != 'max_food_group' and col != 'geometry'])
gdf_normalized

Unnamed: 0,geometry,znormal_vegetables,znormal_sweetners,znormal_vegetable-oils,znormal_treenuts,znormal_stimulants,znormal_spices,znormal_sugar-crops,znormal_milk-excluding-butter,znormal_oilcrops,znormal_pulses,znormal_cereals-excluding-beer,znormal_aquatic-products-other,znormal_offals,znormal_fish-seafood,znormal_meat,znormal_eggs,znormal_alcoholic-beverages,znormal_fruits-excluding-wine,znormal_starchy-roots,znormal_animal-fats,max_food_group
364100,POINT (-78.29167 82.95833),,,,,,,,-0.282098,,,,,-0.349388,,-0.248993,-0.169072,,,,-0.275192,eggs
368396,POINT (-80.29167 82.875),,,,,,,,-0.283542,,,,,-0.351325,,-0.250708,-0.169186,,,,-0.279090,eggs
368404,POINT (-79.625 82.875),,,,,,,,-0.281430,,,,,-0.349739,,-0.249181,-0.169120,,,,-0.273463,eggs
368406,POINT (-79.45833 82.875),,,,,,,,-0.281315,,,,,-0.348830,,-0.248469,-0.169066,,,,-0.273107,eggs
368407,POINT (-79.375 82.875),,,,,,,,-0.280806,,,,,-0.348057,,-0.247791,-0.169019,,,,-0.271729,eggs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7548383,POINT (-68.04167 -55.625),,,,,,,,-0.281418,,,,-0.205162,-0.349750,-0.14292,-0.247191,-0.166829,,,,-0.273692,fish-seafood
7552709,POINT (-67.54167 -55.70833),,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood
7552710,POINT (-67.45833 -55.70833),,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood
7561348,POINT (-67.625 -55.875),,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood


In [76]:
gdf_normalized_shrunk = gdf_normalized.copy()
znormal_columns = [col for col in gdf_normalized_shrunk.columns if col.startswith('znormal')]
gdf_normalized_shrunk[znormal_columns] = gdf_normalized_shrunk[znormal_columns]*10000
gdf_normalized_shrunk[znormal_columns] = gdf_normalized_shrunk[znormal_columns].round(0)
gdf_normalized_shrunk

Unnamed: 0,geometry,znormal_vegetables,znormal_sweetners,znormal_vegetable-oils,znormal_treenuts,znormal_stimulants,znormal_spices,znormal_sugar-crops,znormal_milk-excluding-butter,znormal_oilcrops,znormal_pulses,znormal_cereals-excluding-beer,znormal_aquatic-products-other,znormal_offals,znormal_fish-seafood,znormal_meat,znormal_eggs,znormal_alcoholic-beverages,znormal_fruits-excluding-wine,znormal_starchy-roots,znormal_animal-fats,max_food_group
364100,POINT (-78.29167 82.95833),,,,,,,,-2821.0,,,,,-3494.0,,-2490.0,-1691.0,,,,-2752.0,eggs
368396,POINT (-80.29167 82.875),,,,,,,,-2835.0,,,,,-3513.0,,-2507.0,-1692.0,,,,-2791.0,eggs
368404,POINT (-79.625 82.875),,,,,,,,-2814.0,,,,,-3497.0,,-2492.0,-1691.0,,,,-2735.0,eggs
368406,POINT (-79.45833 82.875),,,,,,,,-2813.0,,,,,-3488.0,,-2485.0,-1691.0,,,,-2731.0,eggs
368407,POINT (-79.375 82.875),,,,,,,,-2808.0,,,,,-3481.0,,-2478.0,-1690.0,,,,-2717.0,eggs
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7548383,POINT (-68.04167 -55.625),,,,,,,,-2814.0,,,,-2052.0,-3497.0,-1429.0,-2472.0,-1668.0,,,,-2737.0,fish-seafood
7552709,POINT (-67.54167 -55.70833),,,,,,,,,,,,-2052.0,,-1429.0,,,,,,,fish-seafood
7552710,POINT (-67.45833 -55.70833),,,,,,,,,,,,-2052.0,,-1429.0,,,,,,,fish-seafood
7561348,POINT (-67.625 -55.875),,,,,,,,,,,,-2052.0,,-1429.0,,,,,,,fish-seafood


In [73]:
# convert the znormal columns to integers
gdf_normalized_shrunk.to_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-normal-shrunk-int.geojson', driver='GeoJSON')

In [80]:
# whats the min max value of each column
gdf_normalized_shrunk.describe()

Unnamed: 0,znormal_vegetables,znormal_sweetners,znormal_vegetable-oils,znormal_treenuts,znormal_stimulants,znormal_spices,znormal_sugar-crops,znormal_milk-excluding-butter,znormal_oilcrops,znormal_pulses,znormal_cereals-excluding-beer,znormal_aquatic-products-other,znormal_offals,znormal_fish-seafood,znormal_meat,znormal_eggs,znormal_alcoholic-beverages,znormal_fruits-excluding-wine,znormal_starchy-roots,znormal_animal-fats,max_food_group_value
count,881049.0,0.0,857526.0,729730.0,423885.0,458933.0,551357.0,1839434.0,840266.0,799354.0,893108.0,109754.0,1794987.0,142122.0,1892583.0,1744120.0,658750.0,888467.0,847291.0,1864141.0,1918519.0
mean,0.008780442,,0.00842,0.001966481,0.01021,-0.003845877,0.015928,0.0009301775,0.004811571,0.01178452,-0.003332184,0.096179,0.003042919,0.078412,-0.004262957,0.0008267779,-0.002012903,0.005791999,0.002467865,0.0001485939,5795.376
std,9999.998,,9999.998047,10000.0,9999.998047,10000.0,9999.998047,10000.0,10000.0,9999.998,10000.0,9999.918945,9999.999,9999.976562,10000.0,10000.0,10000.0,9999.998,9999.999,10000.0,24336.8
min,-2258.0,,-2606.0,-1274.0,-3457.0,-1332.0,-1540.0,-2840.0,-2333.0,-2030.0,-4725.0,-2963.0,-3518.0,-3938.0,-2511.0,-1692.0,-1117.0,-2258.0,-2399.0,-2804.0,-3900.0
25%,-2218.0,,-2560.0,-1260.0,-3453.0,-1315.0,-1529.0,-2796.0,-2302.0,-1985.0,-4602.0,-2960.0,-3444.0,-3318.0,-2461.0,-1690.0,-1090.0,-2214.0,-2350.0,-2756.0,-1689.0
50%,-2041.0,,-2229.0,-1171.0,-3418.0,-1204.0,-1449.0,-2541.0,-2085.0,-1766.0,-3679.0,-2945.0,-2978.0,-2769.0,-2261.0,-1674.0,-930.0,-1965.0,-2071.0,-2552.0,-1206.0
75%,-1141.0,,-823.0,-564.0,-3154.0,-376.0,-1022.0,-1123.0,-1084.0,-891.0,16.0,-2621.0,-509.0,-1280.0,-1092.0,-1447.0,-309.0,-490.0,-939.0,-1136.0,3527.0
max,1420201.0,,570167.0,2790431.0,117351.0,4464816.0,643686.0,1088006.0,1119617.0,4428434.0,1514975.0,53567.0,1218791.0,438562.0,984677.0,1725096.0,4786238.0,5846762.0,1650443.0,2535966.0,5846762.0


In [77]:
# This doesn't run here, but adding for documentation on a possible way to remove null values from the geojson
# !jq ' .features |= map( .properties |= with_entries( select(.value != null) ) ) ' food-group-2-normal-shrunk-int.geojson > food-group-2-normal-shrunk-int-nonull.geojson

In [78]:
# create a column that is the value of the max znormal column
gdf_normalized_shrunk['max_food_group_value'] = gdf_normalized_shrunk[max_columns].max(axis=1)
gdf_normalized_tiny = gdf_normalized_shrunk.drop(columns=[col for col in gdf_normalized_shrunk.columns if 'znormal_' in col])
gdf_normalized_tiny

Unnamed: 0,geometry,max_food_group,max_food_group_value
364100,POINT (-78.29167 82.95833),eggs,-1691.0
368396,POINT (-80.29167 82.875),eggs,-1692.0
368404,POINT (-79.625 82.875),eggs,-1691.0
368406,POINT (-79.45833 82.875),eggs,-1691.0
368407,POINT (-79.375 82.875),eggs,-1690.0
...,...,...,...
7548383,POINT (-68.04167 -55.625),fish-seafood,-1429.0
7552709,POINT (-67.54167 -55.70833),fish-seafood,-1429.0
7552710,POINT (-67.45833 -55.70833),fish-seafood,-1429.0
7561348,POINT (-67.625 -55.875),fish-seafood,-1429.0


In [79]:
gdf_normalized_tiny.to_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-max.geojson', driver='GeoJSON')

In [1]:
#Tiling Script:
!tippecanoe -zg -o z-score-tiles.mbtiles --drop-fraction-as-needed --extend-zooms-if-still-dropping /Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-max.geojson

# Mass Based Normalization

In [4]:
# read '/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-normal.geojson' as geodataframe
all_crops = gpd.read_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-normal.geojson')

In [5]:
all_crops

Unnamed: 0,vegetables,sweetners,vegetable-oils,treenuts,stimulants,spices,sugar-crops,milk-excluding-butter,oilcrops,pulses,cereals-excluding-beer,aquatic-products-other,offals,fish-seafood,meat,eggs,alcoholic-beverages,fruits-excluding-wine,starchy-roots,animal-fats,znormal_vegetables,znormal_sweetners,znormal_vegetable-oils,znormal_treenuts,znormal_stimulants,znormal_spices,znormal_sugar-crops,znormal_milk-excluding-butter,znormal_oilcrops,znormal_pulses,znormal_cereals-excluding-beer,znormal_aquatic-products-other,znormal_offals,znormal_fish-seafood,znormal_meat,znormal_eggs,znormal_alcoholic-beverages,znormal_fruits-excluding-wine,znormal_starchy-roots,znormal_animal-fats,max_food_group,geometry
0,,,,,,,,3.350658,,,,,0.068162,,1.525191,0.042130,,,,0.467383,,,,,,,,-0.282098,,,,,-0.349388,,-0.248993,-0.169072,,,,-0.275192,eggs,POINT (-78.29167 82.95833)
1,,,,,,,,0.835334,,,,,0.012665,,0.298146,0.008088,,,,0.115728,,,,,,,,-0.283542,,,,,-0.351325,,-0.250708,-0.169186,,,,-0.279090,eggs,POINT (-80.29167 82.875)
2,,,,,,,,4.513004,,,,,0.058110,,1.391083,0.027714,,,,0.623350,,,,,,,,-0.281430,,,,,-0.349739,,-0.249181,-0.169120,,,,-0.273463,eggs,POINT (-79.625 82.875)
3,,,,,,,,4.714489,,,,,0.084159,,1.900677,0.043892,,,,0.655487,,,,,,,,-0.281315,,,,,-0.348830,,-0.248469,-0.169066,,,,-0.273107,eggs,POINT (-79.45833 82.875)
4,,,,,,,,5.600649,,,,,0.106315,,2.385633,0.058007,,,,0.779842,,,,,,,,-0.280806,,,,,-0.348057,,-0.247791,-0.169019,,,,-0.271729,eggs,POINT (-79.375 82.875)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1918514,,,,,,,,4.534473,,,,100.39887,0.057795,787.53168,2.815090,0.715552,,,,0.602658,,,,,,,,-0.281418,,,,-0.205162,-0.349750,-0.14292,-0.247191,-0.166829,,,,-0.273692,fish-seafood,POINT (-68.04167 -55.625)
1918515,,,,,,,,,,,,100.39887,,787.53168,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood,POINT (-67.54167 -55.70833)
1918516,,,,,,,,,,,,100.39887,,787.53168,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood,POINT (-67.45833 -55.70833)
1918517,,,,,,,,,,,,100.39887,,787.53168,,,,,,,,,,,,,,,,,,-0.205162,,-0.14292,,,,,,,fish-seafood,POINT (-67.625 -55.875)


In [6]:
# create a max weight column that is the max value of the any column that is not geometry, max_food_group, or max_food_group_value, or starting with z normal
all_crops['max_weight'] = all_crops[[col for col in all_crops.columns if col not in ['geometry', 'max_food_group', 'max_food_group_value'] and not col.startswith('znormal')]].max(axis=1)
all_crops['max_weight_food_group'] = all_crops[[col for col in all_crops.columns if col not in ['geometry', 'max_food_group', 'max_food_group_value'] and not col.startswith('znormal')]].idxmax(axis=1)

In [7]:
max_weight_crops = all_crops.copy()
max_weight_crops = max_weight_crops.drop(columns=[col for col in all_crops.columns if col not in ['geometry', 'max_weight', 'max_weight_food_group']])

In [8]:
max_weight_crops

Unnamed: 0,geometry,max_weight,max_weight_food_group
0,POINT (-78.29167 82.95833),3.350658,milk-excluding-butter
1,POINT (-80.29167 82.875),0.835334,milk-excluding-butter
2,POINT (-79.625 82.875),4.513004,milk-excluding-butter
3,POINT (-79.45833 82.875),4.714489,milk-excluding-butter
4,POINT (-79.375 82.875),5.600649,milk-excluding-butter
...,...,...,...
1918514,POINT (-68.04167 -55.625),787.53168,fish-seafood
1918515,POINT (-67.54167 -55.70833),787.53168,fish-seafood
1918516,POINT (-67.45833 -55.70833),787.53168,fish-seafood
1918517,POINT (-67.625 -55.875),787.53168,fish-seafood


In [9]:
max_weight_crops['max_weight'].min()

4.5614863e-07

In [10]:
# write to geojson
max_weight_crops.to_file('/Volumes/Samsung 4TB/earthgenome/foodtwin/production-supply/vectorized_production/food-group-2-max-weight.geojson', driver='GeoJSON')