In [9]:
import xarray as xr
import pandas as pd
import matplotlib.pyplot as plt


In [10]:
import glob

# Set the directory containing your NetCDF files
data_dir = "E:/jupyter/DATA/wrf MAM/daily_output/"

# Get a list of all NetCDF files in the directory
file_paths = glob.glob(data_dir + "*.nc")

# Load all datasets into a list
datasets = [xr.open_dataset(file_path) for file_path in file_paths]

# Concatenate the datasets along the 'time' dimension
merged_ds = xr.concat(datasets, dim='time')


# Sort the 'time' dimension
merged_ds = merged_ds.sortby('time')

# Select data from the first day of the first week to the last day of the fourth week
start_date = merged_ds['time'][0].values
end_date = merged_ds['time'][-1].values  

merged_ds = merged_ds.sel(time=slice(start_date, end_date))

# Set the output file path
output_path = "E:/jupyter/DATA/wrf MAM/merged_wrf_data.nc"

# Save the merged dataset to NetCDF format
merged_ds.to_netcdf(output_path)

print(f"Merged dataset saved to {output_path}")

KeyboardInterrupt: 

In [4]:
# Load the CHIRPS data
ds = xr.open_dataset('E:/jupyter/DATA/wrf MAM/merged_wrf_data.nc')

print(ds)

<xarray.Dataset> Size: 1GB
Dimensions:    (time: 142, lat: 1549, lon: 1379)
Coordinates:
  * lat        (lat) float32 6kB -13.5 -13.46 -13.42 -13.4 ... 24.42 24.46 24.5
  * lon        (lon) float32 6kB 20.5 20.54 20.58 20.6 ... 52.9 52.92 52.96 53.0
  * time       (time) datetime64[ns] 1kB 2024-02-28T06:00:00 ... 2024-06-11T0...
Data variables:
    dailyrain  (time, lat, lon) float32 1GB ...
Attributes:
    GCM_source:     NOAA NCEP CFSv2 
    creation_date:  Tue Feb 27 09:06:03 EAT 2024
    source_file:    /home/forecast/forecasts/wrf/weekly/  
    title:          WRF daily predicted rainfall 


In [5]:
# Get latitude and longitude values
latitudes = ds.lat.values
longitudes = ds.lon.values

# Calculate the resolution
lat_res = abs(latitudes[1] - latitudes[0])
lon_res = abs(longitudes[1] - longitudes[0])

# Print the resolution
print(f"Latitude Resolution: {lat_res} degrees")
print(f"Longitude Resolution: {lon_res} degrees")

Latitude Resolution: 0.03815269470214844 degrees
Longitude Resolution: 0.03973197937011719 degrees


In [8]:
# Filter for the time period between February 1st and May 30th
ds = ds.sel(time=slice('2024-04-01', '2024-04-30'))

# Define the approximate latitude and longitude boundaries of Kenya
min_latitude = -4.0
max_latitude = 4.0
min_longitude = 34.0
max_longitude = 42.0

# Select data for Kenya using 'where' - corrected to use 'lat' and 'lon'
kenya_data = ds.where((ds.lat >= min_latitude) & (ds.lat <= max_latitude) & 
                       (ds.lon >= min_longitude) & (ds.lon <= max_longitude), drop=True)

# Convert to a pandas DataFrame
df = kenya_data.to_dataframe()

# Reset index to create separate columns for lon, lat, and time
df = df.reset_index()

# Rename columns to lon, lat, and rain
df = df.rename(columns={'lon': 'lon', 'lat': 'lat', 'dailyrain': 'rain'})

# Save to a CSV file in your E drive
df.to_csv('E:/wrf_data1.csv', index=False)