In [1]:
import os
import numpy as np
import pandas as pd
import rasterio
import geopandas as gpd
from shapely.geometry import Polygon, shape
from shapely.ops import transform
import pystac
import pystac_client
import planetary_computer
import xarray
from functools import partial
from rasterio.plot import show
from rioxarray.merge import merge_arrays
from multiprocessing import Pool

In [None]:
# Define a function to process a chunk of data
def process_chunk(chunk, heat_data, outFileName):
    # Your existing code goes here
    # Replace this comment with your actual code to process the chunk
    # ...

    # Example: Calculate the sum of 'pop_data' for the chunk
    chunk_sum = chunk['pop_data'].sum()

    # Save the results to a CSV file
    chunk_result = pd.DataFrame({'Chunk_Sum': [chunk_sum]})
    chunk_result.to_csv(outFileName, index=False)
    print(f"Processed chunk and saved results to {outFileName}")

In [None]:
# Define the directory and other variables
directory = os.getcwd() + '/data'
heat_data = rasterio.open('global_mean_wbgt_30.tif', masked=True)

countryCode = 'usa'
fileName = '_ppp_2020_1km_Aggregated_UNadj.tif'
filePath = directory + '/' + countryCode + fileName

country_pop = rioxarray.open_rasterio(filePath, masked=True)

outFileName = directory + '/' + countryCode + '_heat_flood_extract.csv'

country_box = country_pop.rio.bounds()

# Define filePath, country_pop, country_box, and other variables as needed

In [None]:
# Split your data into chunks
chunk_size = 10000  # Adjust this value based on available memory
chunks = [country_pop_gdf[i:i + chunk_size] for i in range(0, len(country_pop_gdf), chunk_size)]


In [None]:
# Create a Pool of worker processes
num_processes = 4  # Adjust the number of processes as needed
pool = Pool(num_processes)


In [None]:
# Process each chunk in parallel
results = []
for i, chunk in enumerate(chunks):
    out_chunk_file = f"{countryCode}_chunk_{i}.csv"
    result = pool.apply_async(process_chunk, args=(chunk, heat_data, out_chunk_file))
    results.append(result)

In [None]:
# Wait for all processes to finish
pool.close()
pool.join()


In [None]:
# Combine the results into a single dataframe if needed
combined_df = pd.concat([pd.read_csv(f"{countryCode}_chunk_{i}.csv") for i in range(len(chunks))])

# Save the combined dataframe to the final output file
outFileName = directory + '/' + countryCode + '_heat_flood_extract.csv'
combined_df.to_csv(outFileName, index=False)

print(f"Final results saved to {outFileName}")