In [1]:
import xarray as xr

import pandas as pd
import numpy as np

In [2]:
# Load the .nc file
wo = xr.open_dataset('/Users/arup/Documents/ISRO-Project/prediction/raw_data/wo.nc')

# View the data
wo

In [3]:
# Check lev (depth) metadata
print(wo.lev.attrs)
# Output should confirm:
#   units = "meters"
#   positive = "down" (standard for ocean depth)

{'standard_name': 'depth', 'long_name': 'ocean depth coordinate', 'units': 'm', 'positive': 'down', 'axis': 'Z', 'bounds': 'lev_bnds'}


In [4]:
# Convert the 4D data to 2D dataframe
wo = wo['wo'].to_dataframe().reset_index()

wo

Unnamed: 0,time,lev,j,i,longitude,latitude,wo
0,2025-01-16 12:00:00,0.0,137.0,10.0,87.209916,22.810536,
1,2025-01-16 12:00:00,0.0,137.0,11.0,87.644821,22.818800,
2,2025-01-16 12:00:00,0.0,137.0,12.0,88.079744,22.827879,
3,2025-01-16 12:00:00,0.0,137.0,13.0,88.514688,22.837772,
4,2025-01-16 12:00:00,0.0,137.0,14.0,88.949652,22.848474,
...,...,...,...,...,...,...,...
221395,2029-12-16 12:00:00,6020.0,146.0,14.0,89.022688,19.994032,
221396,2029-12-16 12:00:00,6020.0,146.0,15.0,89.463056,20.002688,
221397,2029-12-16 12:00:00,6020.0,146.0,16.0,89.903443,20.011949,
221398,2029-12-16 12:00:00,6020.0,146.0,17.0,90.343850,20.021812,


In [5]:
# Get the time range this dataset covers
time_range_start = wo['time'].min()
time_range_end = wo['time'].max()

print(f"Time range: {time_range_start} to {time_range_end}")

Time range: 2025-01-16 12:00:00 to 2029-12-16 12:00:00


In [6]:
# Get the time range this dataset covers
time_range_start = wo['lev'].min()
time_range_end = wo['lev'].max()

print(f"Lev range: {time_range_start} to {time_range_end}")

Lev range: 0.0 to 6020.0


In [7]:
# Filter the dataset to include only rows where lev = 0
wo = wo[wo['lev'] == 0.0]

# Select only required columns
wo = wo[['time', 'longitude', 'latitude', 'wo']]

# Remove NaN values from wo column
wo = wo.dropna(subset=['wo'])

# Round off latitude and longitude to 2 decimal places
wo['longitude'] = wo['longitude'].round(2)
wo['latitude'] = wo['latitude'].round(2)

# Convert time to datetime if not already
wo['time'] = pd.to_datetime(wo['time'])

# Create a new column with first day of each month
wo['time'] = wo['time'].dt.strftime('%Y-%m-01')

# Group by time, longitude, latitude and calculate mean of wo
wo = wo.groupby(['time', 'longitude', 'latitude'])['wo'].mean().reset_index()

# Convert time back to datetime
wo['time'] = pd.to_datetime(wo['time'])

# Sort by date
wo = wo.sort_values('time')

print("Shape after monthly aggregation:", wo.shape)

wo

Shape after monthly aggregation: (3240, 4)


Unnamed: 0,time,longitude,latitude,wo
0,2025-01-01,87.24,21.24,-2.178956e-08
29,2025-01-01,89.42,21.60,3.127674e-08
30,2025-01-01,89.43,21.28,3.544245e-08
31,2025-01-01,89.44,20.97,3.798734e-08
32,2025-01-01,89.45,20.65,3.986886e-08
...,...,...,...,...
3207,2029-12-01,88.58,19.99,4.652049e-08
3208,2029-12-01,88.58,20.31,5.307874e-08
3209,2029-12-01,88.98,21.59,4.555405e-08
3197,2029-12-01,88.11,21.57,-1.943153e-07


In [8]:
# Save the dataset as a CSV file in the processed_data folder
wo.to_csv('/Users/arup/Documents/ISRO-Project/prediction/processed_data/wo.csv', index=False)