In [2]:
!pip install --upgrade ipykernel



In [3]:
import ee

In [4]:
ee.Authenticate()

True

In [5]:
ee.Initialize()

In [6]:
!pip --version

pip 25.0.1 from /Users/caseydai/Desktop/GHP299/Nick_Codes/myenvironment/lib/python3.13/site-packages/pip (python 3.13)


In [7]:
import sys
sys.path.append('/Users/caseydai/Desktop/GHP299/Nick_Codes/myenvironment/lib/python3.13/site-packages')

import os, re
!pip install pandas
import pandas as pd
!pip install gee_subset
from gee_subset import gee_subset
!pip install geopandas
import geopandas as gpd
!pip install geemap
import geemap
from datetime import datetime, timezone



In [8]:
fc = ee.FeatureCollection('projects/ee-rvmscdai/assets/BAIRROS_NOISLAS')

In [774]:
#export in chunks, every 2 months  
start_date = '2024-10-29'
end_date = '2024-12-29'
variables = ['temperature_2m_min','temperature_2m_max','total_precipitation_sum','temperature_2m']

In [775]:
# Initialize lists to store extracted values
dates = []
bairros = []
variable_values = {variable: [] for variable in variables}

In [776]:
# Function to calculate the weighted average by area of overlapping pixels for each date
def calculate_weighted_average(image, feature, variable):
    # Calculate the area of the pixels
    pixel_area = ee.Image.pixelArea()

    # Calculate the area-weighted sum of the variable
    weighted_sum = image.multiply(pixel_area).reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=feature.geometry(),
        scale=10000,
        maxPixels=1e9
    )

    # Calculate the total area of the overlapping pixels
    total_area = pixel_area.reduceRegion(
        reducer=ee.Reducer.sum(),
        geometry=feature.geometry(),
        scale=10000,
        maxPixels=1e9
    )

    # Calculate the weighted average
    weighted_avg = ee.Number(weighted_sum.get(variable)).divide(total_area.get('area'))

    return image.set('weighted_avg', weighted_avg)

# Function to process each variable for a given feature
def process_variable(variable, feature):
    # Load ERA5 data for the variable
    variable_data = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR') \
        .filterDate(start_date, end_date) \
        .select(variable)

    # Clip the raster to the polygon and calculate the weighted average
    def map_over_images(image):
        return calculate_weighted_average(image, feature, variable)

    weighted_data = variable_data.map(map_over_images)

    # Extract the values and dates
    dates_ = weighted_data.aggregate_array('system:time_start')
    values_ = weighted_data.aggregate_array('weighted_avg')

    return ee.Feature(None, {
        'bairro': feature.get('CodBairro'),
        'dates': dates_,
        'values': values_,
        'variable': variable
    })

# Function to extract data for each polygon
def extract_data(feature):
    return ee.FeatureCollection([process_variable(variable, feature) for variable in variables])


In [777]:
# Apply the extraction function to each polygon in the shapefile
results = fc.map(extract_data).flatten()

# Get the results as a list
results_list = results.getInfo()

# Parse the results
for result in results_list['features']:
    properties = result['properties']
    bairro = properties['bairro']
    variable = properties['variable']
    dates_ = properties['dates']
    values_ = properties['values']
    
    for date, value in zip(dates_, values_):
        dates.append(datetime.utcfromtimestamp(date / 1000).strftime('%Y-%m-%d'))
        bairros.append(bairro)
        variable_values[variable].append(value)

# Check the lengths of the lists
print(f"Length of dates: {len(dates)}")
print(f"Length of bairros: {len(bairros)}")
for variable in variables:
    print(f"Length of {variable} values: {len(variable_values[variable])}")
    

Length of dates: 38308
Length of bairros: 38308
Length of temperature_2m_min values: 9577
Length of temperature_2m_max values: 9577
Length of total_precipitation_sum values: 9577
Length of temperature_2m values: 9577


  dates.append(datetime.utcfromtimestamp(date / 1000).strftime('%Y-%m-%d'))


In [778]:
# Initialize lists to store extracted values
dates = []
bairros = []  # Ensure this is initialized as a list
variable_values = {variable: [] for variable in variables}

# Loop through results and parse them correctly
for result in results_list['features']:
    properties = result['properties']
    bairro = properties['bairro']  # Assign the string `bairro`
    variable = properties['variable']
    dates_ = properties['dates']
    values_ = properties['values']

    for date, value in zip(dates_, values_):
        dates.append(datetime.fromtimestamp(date / 1000, tz=timezone.utc).strftime('%Y-%m-%d'))
        bairros.append(bairro)  # Append `bairro` string to `bairros` list
        variable_values[variable].append(value)

# Check the lengths of the lists
print(f"Length of dates: {len(dates)}")
print(f"Length of bairros: {len(bairros)}")  # Corrected print statement
for variable in variables:
    print(f"Length of {variable} values: {len(variable_values[variable])}")

Length of dates: 38308
Length of bairros: 38308
Length of temperature_2m_min values: 9577
Length of temperature_2m_max values: 9577
Length of total_precipitation_sum values: 9577
Length of temperature_2m values: 9577


In [779]:
# Create a DataFrame for dates and bairros
df_dates_bairros = pd.DataFrame({'date': dates, 'bairro': bairros})

# Get unique combinations of dates and bairros
df_unique_dates_bairros = df_dates_bairros.drop_duplicates().reset_index(drop=True)

# Start with this DataFrame
dfs = [df_unique_dates_bairros]

# Create DataFrames for each variable's values
for variable, values in variable_values.items():
    df_variable = pd.DataFrame({variable: values})
    dfs.append(df_variable)  # Ensure consistent indentation

# Concatenate all DataFrames along the columns (axis=1)
df_combined = pd.concat(dfs, axis=1)

# Check the final combined DataFrame
print(df_combined)

            date  bairro  temperature_2m_min  temperature_2m_max  \
0     2024-10-29       1            0.000000            0.000000   
1     2024-10-30       1            0.000000            0.000000   
2     2024-10-31       1            0.000000            0.000000   
3     2024-11-01       1            0.000000            0.000000   
4     2024-11-02       1            0.000000            0.000000   
...          ...     ...                 ...                 ...   
9572  2024-12-24     150           52.108918           52.743655   
9573  2024-12-25     150           52.089892           53.709785   
9574  2024-12-26     150           52.769047           53.452670   
9575  2024-12-27     150           52.512207           52.973246   
9576  2024-12-28     150           52.373624           53.373114   

      total_precipitation_sum  temperature_2m  
0                    0.000000        0.000000  
1                    0.000000        0.000000  
2                    0.000000        0.

In [780]:
# Create a DataFrame to store the results
df = pd.DataFrame(df_combined)

In [781]:
df

Unnamed: 0,date,bairro,temperature_2m_min,temperature_2m_max,total_precipitation_sum,temperature_2m
0,2024-10-29,1,0.000000,0.000000,0.000000,0.000000
1,2024-10-30,1,0.000000,0.000000,0.000000,0.000000
2,2024-10-31,1,0.000000,0.000000,0.000000,0.000000
3,2024-11-01,1,0.000000,0.000000,0.000000,0.000000
4,2024-11-02,1,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
9572,2024-12-24,150,52.108918,52.743655,0.000665,52.362855
9573,2024-12-25,150,52.089892,53.709785,0.000053,52.830487
9574,2024-12-26,150,52.769047,53.452670,0.000903,52.987900
9575,2024-12-27,150,52.512207,52.973246,0.008544,52.739743


In [782]:
df.to_csv('RJ_clima_10_2024_to_12_2024.csv', index=False)