In [8]:
import xarray as xr
import netCDF4 as nc
import os
import pandas as pd

In [9]:
# Define the base directory where NetCDF files are located
base_directory = "/Users/gaylynruvere/Documents/Machine_Learning/Hackathons/UN_Hackathon_2023_Peat_Fires/"  # Modify this to the appropriate base directory

# Define the variables you want to extract
desired_variables = ["T2M_MIN", "T2M_MAX", "T2M", "PRECTOTCORR", "QV2M", "WS10M", "WD10M", "WS10M_MAX", "WS10M_MIN"]  # Replace with your desired variable names
min_latitude = -6.92276
max_latitude = 7.10208
min_longitude = 94.71447
max_longitude = 107.28015

# Define the years and months to loop through
years = ["2010", "2011", "2012", "2013", "2014", "2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023"]
months = [str(i).zfill(2) for i in range(1, 3)]
days = [str(i).zfill(2) for i in range(1, 32)]

# Create an empty list to store DataFrame objects
dataframes = []

# Loop through the years, months, and days
for year in years:
    print(year)
    for month in months:
        for day in days:
            # Construct the directory path based on the year, month, and day
            directory_path = os.path.join(base_directory, year, month)
            print(directory_path)

            # Check if the directory exists
            if not os.path.exists(directory_path):
                continue

            # Construct the file name based on the year, month, and day
            file_name = f"power_901_daily_{year}{month}{day}_merra2_lst.nc"
            file_path = os.path.join(directory_path, file_name)
            # print(file_path)

            # Check if the file exists
            if not os.path.exists(file_path):
                continue  # File does not exist, continue to the next day

            # Open the NetCDF file
            dataset = xr.open_dataset(file_path)

            df = dataset.to_dataframe()
            #print(df)

            # Close the NetCDF file
            dataset.close()

            # Keep only the desired variables in the DataFrame
            df = df[desired_variables]
            df = df.reset_index()
            
            # Example of conditions for latitude and longitude
            #lat_condition = (df['lat'].between(-6.92276, 7.10208, inclusive=True))
            #long_condition = (df['long'].between(94.71447, 107.28015, inclusive=True))
            #if (lat_condition & long_condition).any():
                 #dataframes.append(df)
            df = df[(df['lat'] >= min_latitude) & (df['lat'] <= max_latitude) & (df['lon'] >= min_longitude) & (df['lon'] <= max_longitude)]
            
            print(df)
            
            dataframes.append(df)

# Concatenate all DataFrames into one
final_df = pd.concat(dataframes, ignore_index=True)

# Print the final DataFrame
#print(final_df)


2010
/Users/gaylynruvere/Documents/Machine_Learning/Hackathons/UN_Hackathon_2023_Peat_Fires/2010/01
            lon  lat       time     T2M_MIN     T2M_MAX         T2M  \
159007   95.000 -6.5 2010-01-01  301.351562  301.679688  301.507812   
159008   95.000 -6.0 2010-01-01  301.421875  301.757812  301.648438   
159009   95.000 -5.5 2010-01-01  301.515625  302.117188  301.828125   
159010   95.000 -5.0 2010-01-01  301.601562  302.359375  301.953125   
159011   95.000 -4.5 2010-01-01  301.695312  302.437500  302.085938   
...         ...  ...        ...         ...         ...         ...   
165889  106.875  5.0 2010-01-01  299.273438  299.656250  299.554688   
165890  106.875  5.5 2010-01-01  299.343750  299.578125  299.468750   
165891  106.875  6.0 2010-01-01  299.281250  299.507812  299.406250   
165892  106.875  6.5 2010-01-01  299.320312  299.453125  299.382812   
165893  106.875  7.0 2010-01-01  299.242188  299.437500  299.343750   

        PRECTOTCORR      QV2M      WS10M     WD

In [10]:
len(final_df)

464240

In [13]:
final_df.to_csv('Climate_2010_2023.csv')

In [29]:
# Merge 2000-2009 file from Raj with 2010-2023 file

df_2001_2009 = pd.read_csv('Climate_2001_2009.csv')
df_2010_2023 = final_df

print(f'df_2000_2009.shape: {df_2001_2009.shape}')
print(f'df_2010_2023.shape: {df_2010_2023.shape}')

df_2000_2009.shape: (298480, 13)
df_2010_2023.shape: (464240, 12)


In [30]:
df_2001_2009.head()

Unnamed: 0.1,Unnamed: 0,lon,lat,time,T2M_MIN,T2M_MAX,T2M,PRECTOTCORR,QV2M,WS10M,WD10M,WS10M_MAX,WS10M_MIN
0,0,95.0,-6.5,2001-01-01,299.523438,300.945312,300.359375,0.000305,0.019104,5.125,125.1875,7.476562,3.320312
1,1,95.0,-6.0,2001-01-01,299.476562,300.6875,300.132812,0.000183,0.01886,3.796875,129.1875,4.609375,2.78125
2,2,95.0,-5.5,2001-01-01,299.328125,300.351562,299.84375,0.000122,0.018494,2.570312,149.375,3.78125,0.171875
3,3,95.0,-5.0,2001-01-01,298.804688,299.929688,299.507812,6.1e-05,0.01825,2.703125,173.625,3.828125,1.539062
4,4,95.0,-4.5,2001-01-01,298.382812,299.507812,299.117188,6.1e-05,0.018127,2.84375,181.1875,4.46875,0.695312


In [31]:
df_2001_2009 = df_2001_2009.drop(columns=['Unnamed: 0'])
df_2001_2009.head()

Unnamed: 0,lon,lat,time,T2M_MIN,T2M_MAX,T2M,PRECTOTCORR,QV2M,WS10M,WD10M,WS10M_MAX,WS10M_MIN
0,95.0,-6.5,2001-01-01,299.523438,300.945312,300.359375,0.000305,0.019104,5.125,125.1875,7.476562,3.320312
1,95.0,-6.0,2001-01-01,299.476562,300.6875,300.132812,0.000183,0.01886,3.796875,129.1875,4.609375,2.78125
2,95.0,-5.5,2001-01-01,299.328125,300.351562,299.84375,0.000122,0.018494,2.570312,149.375,3.78125,0.171875
3,95.0,-5.0,2001-01-01,298.804688,299.929688,299.507812,6.1e-05,0.01825,2.703125,173.625,3.828125,1.539062
4,95.0,-4.5,2001-01-01,298.382812,299.507812,299.117188,6.1e-05,0.018127,2.84375,181.1875,4.46875,0.695312


In [32]:
df_2010_2023.head()

Unnamed: 0,lon,lat,time,T2M_MIN,T2M_MAX,T2M,PRECTOTCORR,QV2M,WS10M,WD10M,WS10M_MAX,WS10M_MIN
0,95.0,-6.5,2010-01-01,301.351562,301.679688,301.507812,0.000488,0.020325,10.0625,290.125,13.265625,8.179688
1,95.0,-6.0,2010-01-01,301.421875,301.757812,301.648438,0.000549,0.020081,10.648438,289.375,13.304688,8.867188
2,95.0,-5.5,2010-01-01,301.515625,302.117188,301.828125,0.00061,0.019897,10.890625,288.875,12.828125,9.765625
3,95.0,-5.0,2010-01-01,301.601562,302.359375,301.953125,0.000732,0.019653,10.8125,287.0,12.085938,10.265625
4,95.0,-4.5,2010-01-01,301.695312,302.4375,302.085938,0.000732,0.019592,10.601562,285.5625,11.398438,10.140625


In [33]:
df_2001_2023 = pd.concat([df_2001_2009, df_2010_2023], ignore_index=True)

print(f'df_2000_2009.shape: {df_2001_2009.shape}')
print(f'df_2010_2023.shape: {df_2010_2023.shape}')
print(f'df_2000_2023.shape: {df_2001_2023.shape}')

df_2000_2009.shape: (298480, 12)
df_2010_2023.shape: (464240, 12)
df_2000_2023.shape: (762720, 12)


In [34]:
df_2001_2023

Unnamed: 0,lon,lat,time,T2M_MIN,T2M_MAX,T2M,PRECTOTCORR,QV2M,WS10M,WD10M,WS10M_MAX,WS10M_MIN
0,95.000,-6.5,2001-01-01,299.523438,300.945312,300.359375,0.000305,0.019104,5.125000,125.1875,7.476562,3.320312
1,95.000,-6.0,2001-01-01,299.476562,300.687500,300.132812,0.000183,0.018860,3.796875,129.1875,4.609375,2.781250
2,95.000,-5.5,2001-01-01,299.328125,300.351562,299.843750,0.000122,0.018494,2.570312,149.3750,3.781250,0.171875
3,95.000,-5.0,2001-01-01,298.804688,299.929688,299.507812,0.000061,0.018250,2.703125,173.6250,3.828125,1.539062
4,95.000,-4.5,2001-01-01,298.382812,299.507812,299.117188,0.000061,0.018127,2.843750,181.1875,4.468750,0.695312
...,...,...,...,...,...,...,...,...,...,...,...,...
762715,106.875,5.0,2023-02-28 00:00:00,298.757812,299.429688,299.250000,0.000200,0.019043,10.585938,31.9375,13.703125,9.648438
762716,106.875,5.5,2023-02-28 00:00:00,298.710938,299.367188,299.164062,0.000247,0.018860,10.929688,33.6875,13.015625,9.929688
762717,106.875,6.0,2023-02-28 00:00:00,298.750000,299.304688,299.117188,0.000262,0.018677,11.195312,35.8750,12.460938,9.921875
762718,106.875,6.5,2023-02-28 00:00:00,298.781250,299.203125,299.046875,0.000221,0.018494,11.335938,37.8750,12.312500,9.914062


In [35]:
df_2001_2023.to_csv('Climate_2001_2023.csv')