In [4]:
import cdsapi

def data_download(year, month, region):

    c = cdsapi.Client()
    c.retrieve(
        'satellite-fire-burned-area',
        {
            'origin': 'esa_cci',
            'sensor': 'modis',
            'variable': 'pixel_variables',
            'version': '5_1_1cds',
            'region': f'{region}',
            'year': f'{year}',
            'month': f'{str(month).zfill(2)}',
            'nominal_day': '01',
            'format': 'tgz',
        },
        f'../data/{region}_{year}_{str(month).zfill(2)}.tar.gz')
       
    return unpack_data()


In [5]:
def unpack_data():
    import os
    import glob
    data_dir = '../data'

# Find all .tar.gz files in the data directory
    tar_files = glob.glob(os.path.join(data_dir, '*.tar.gz'))

# Unpack and remove each .tar.gz file
    for tar_file in tar_files:
        os.system(f'tar -xf {tar_file} -C {data_dir}')
        os.remove(tar_file)

In [6]:
#just select date and region
#modifying forloop should not be necessary

start_year = 2001
end_year = 2001
start_month = 8
end_month = 8
region = 'europe'

for i in range(start_year, end_year + 1):
    for j in range(start_month, end_month + 1):
        data_download(i, j, region)

2023-06-07 11:19:24,218 INFO Welcome to the CDS
2023-06-07 11:19:24,220 INFO Sending request to https://cds.climate.copernicus.eu/api/v2/resources/satellite-fire-burned-area
2023-06-07 11:19:24,278 INFO Request is completed
2023-06-07 11:19:24,280 INFO Downloading https://download-0015-clone.copernicus-climate.eu/cache-compute-0015/cache/data6/dataset-satellite-fire-burned-area-13ece09a-4791-417c-a9bb-43f5d19dbb5f.tar.gz to ../data/europe_2001_08.tar.gz (47.8M)
2023-06-07 11:19:29,217 INFO Download rate 9.7M/s   


In [1]:
import xarray as xr
import pandas as pd
import numpy as np

pixel_ds = xr.open_dataset('../data/20010801-ESACCI-L3S_FIRE-BA-MODIS-AREA_3-fv5.1.1cds.nc', engine='netcdf4')
pixel_ds

In [2]:
#pixel_ds = pixel_ds.squeeze('time')
def round_to_nearest_025(x):
    return np.round(x * 4) / 4

pixel_ds['lat'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lat'])
pixel_ds['lon'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lon'])

pixel_ds


In [3]:
filtered_data = pixel_ds.squeeze('time')
filtered_data = filtered_data.sel(lat=slice(52, 33), lon=slice(-10, 50))
filtered_data = filtered_data[['CL',
                               'lat_bounds',
                               'lon_bounds'
                               ]]

filtered_data = filtered_data.drop_duplicates(dim=['lat','lon'])




filtered_data

In [4]:
pixel_df = filtered_data.to_dataframe().reset_index()
pixel_df = pixel_df.drop(['lat_bounds', 'lon_bounds', 'bounds'], axis = 1)

pixel_df

Unnamed: 0,lat,lon,CL,time
0,52.0,-10.00,1,2001-07-31
1,52.0,-10.00,1,2001-07-31
2,52.0,-9.75,0,2001-07-31
3,52.0,-9.75,0,2001-07-31
4,52.0,-9.50,1,2001-07-31
...,...,...,...,...
37109,33.0,49.50,1,2001-07-31
37110,33.0,49.75,1,2001-07-31
37111,33.0,49.75,1,2001-07-31
37112,33.0,50.00,1,2001-07-31


In [5]:
pixel_df['year'] = pixel_df['time'].dt.year
pixel_df['month'] = pixel_df['time'].dt.month
pixel_df = pixel_df.drop('time', axis= 1).dropna()
pixel_df

Unnamed: 0,lat,lon,CL,year,month
0,52.0,-10.00,1,2001,7
1,52.0,-10.00,1,2001,7
2,52.0,-9.75,0,2001,7
3,52.0,-9.75,0,2001,7
4,52.0,-9.50,1,2001,7
...,...,...,...,...,...
37109,33.0,49.50,1,2001,7
37110,33.0,49.75,1,2001,7
37111,33.0,49.75,1,2001,7
37112,33.0,50.00,1,2001,7


In [3]:
import xarray as xr
import pandas as pd
import numpy as np
import os


def round_to_nearest_025(x):
    return np.round(x * 4) / 4

def pixel_preprocessing(file_path):
    pixel_ds = xr.open_dataset(file_path, engine='netcdf4')

    pixel_ds['lat'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lat'])
    pixel_ds['lon'] = xr.apply_ufunc(round_to_nearest_025, pixel_ds['lon'])

    filtered_data = pixel_ds.squeeze('time')
    filtered_data = filtered_data.sel(lat=slice(52, 33), lon=slice(-10, 50))
    filtered_data = filtered_data[['CL',
                                   'lat_bounds',
                                   'lon_bounds'
                                   ]]

    filtered_data = filtered_data.drop_duplicates(dim=['lat','lon'])

    pixel_df = filtered_data.to_dataframe().reset_index()
    pixel_df = pixel_df.drop(['lat_bounds', 'lon_bounds', 'bounds'], axis = 1)

    pixel_df['year'] = pixel_df['time'].dt.year
    pixel_df['month'] = pixel_df['time'].dt.month
    pixel_df = pixel_df.drop('time', axis= 1).dropna()
    return pixel_df

In [2]:
pixel_preprocessing('../data/20010801-ESACCI-L3S_FIRE-BA-MODIS-AREA_3-fv5.1.1cds.nc')

Unnamed: 0,lat,lon,CL,year,month
0,52.0,-10.00,1,2001,7
1,52.0,-10.00,1,2001,7
2,52.0,-9.75,0,2001,7
3,52.0,-9.75,0,2001,7
4,52.0,-9.50,1,2001,7
...,...,...,...,...,...
37109,33.0,49.50,1,2001,7
37110,33.0,49.75,1,2001,7
37111,33.0,49.75,1,2001,7
37112,33.0,50.00,1,2001,7


In [None]:
data_dir = '../data'
dfs_grid = []
dfs_pixel = []

for file_name in os.listdir(data_dir):
    if file_name.endswith('.nc') and 'FIRE' in file_name:
        if 'AREA' in file_name:
            file_path = os.path.join(data_dir, file_name)
            df_pixel = pixel_preprocessing(file_path)
            dfs_pixel.append(df_pixel)
        
        else:
            file_path = os.path.join(data_dir, file_name)
            df_grid = fire_preprocessing(file_path)            
            dfs_grid.append(df_grid)

big_pixel_df = pd.concat(dfs_pixel, ignore_index=True)
big_grid_df = pd.concat(dfs_grid, ignore_index=True)

final_df = big_pixel_df.merge(
    big_grid_df,
    how='left',
    left_on=['lat', 'lon', 'year', 'month'],
    right_on=['lat_bounds', 'lon_bounds', 'year', 'month']
    )

final_df.to_csv(os.path.join(data_dir, 'fire.csv'), index=False)