In [1]:
import xarray as xr

import pandas as pd
import numpy as np

In [2]:
# Load the .nc file
no3 = xr.open_dataset('/Users/arup/Documents/ISRO-Project/prediction/raw_data/no3.nc')

# View the data
no3

In [3]:
# Check lev (depth) metadata
print(no3.lev.attrs)
# Output should confirm:
#   units = "meters"
#   positive = "down" (standard for ocean depth)

{'standard_name': 'depth', 'long_name': 'ocean depth coordinate', 'units': 'm', 'positive': 'down', 'axis': 'Z', 'bounds': 'lev_bnds'}


In [4]:
# Convert the 4D data to 2D dataframe
no3 = no3['no3'].to_dataframe().reset_index()

# Rename columns for clarity
no3 = no3.rename(columns={'no3': 'no3'})

no3

Unnamed: 0,time,lev,j,i,longitude,latitude,no3
0,2025-01-16 12:00:00,6.0,137.0,10.0,87.209916,22.810536,
1,2025-01-16 12:00:00,6.0,137.0,11.0,87.644821,22.818800,
2,2025-01-16 12:00:00,6.0,137.0,12.0,88.079744,22.827879,
3,2025-01-16 12:00:00,6.0,137.0,13.0,88.514688,22.837772,
4,2025-01-16 12:00:00,6.0,137.0,14.0,88.949652,22.848474,
...,...,...,...,...,...,...,...
215995,2029-12-16 12:00:00,5720.0,146.0,14.0,89.022688,19.994032,
215996,2029-12-16 12:00:00,5720.0,146.0,15.0,89.463056,20.002688,
215997,2029-12-16 12:00:00,5720.0,146.0,16.0,89.903443,20.011949,
215998,2029-12-16 12:00:00,5720.0,146.0,17.0,90.343850,20.021812,


In [5]:
# Get the time range this dataset covers
time_range_start = no3['time'].min()
time_range_end = no3['time'].max()

print(f"Time range: {time_range_start} to {time_range_end}")

Time range: 2025-01-16 12:00:00 to 2029-12-16 12:00:00


In [6]:
# Get the time range this dataset covers
time_range_start = no3['lev'].min()
time_range_end = no3['lev'].max()

print(f"Lev range: {time_range_start} to {time_range_end}")

Lev range: 6.0 to 5720.0


In [7]:
# Filter the dataset to include only rows where lev = 6
no3 = no3[no3['lev'] == 6.0]

# Select only required columns
no3 = no3[['time', 'longitude', 'latitude', 'no3']]

# Remove NaN values from no3 column
no3 = no3.dropna(subset=['no3'])

# Round off latitude and longitude to 2 decimal places
no3['longitude'] = no3['longitude'].round(2)
no3['latitude'] = no3['latitude'].round(2)

# Convert time to datetime if not already
no3['time'] = pd.to_datetime(no3['time'])

# Create a new column with first day of each month
no3['time'] = no3['time'].dt.strftime('%Y-%m-01')

# Group by time, longitude, latitude and calculate mean of no3
no3 = no3.groupby(['time', 'longitude', 'latitude'])['no3'].mean().reset_index()

# Convert time back to datetime
no3['time'] = pd.to_datetime(no3['time'])

# Sort by date
no3 = no3.sort_values('time')

print("Shape after monthly aggregation:", no3.shape)

no3

Shape after monthly aggregation: (3240, 4)


Unnamed: 0,time,longitude,latitude,no3
0,2025-01-01,87.24,21.24,0.000030
29,2025-01-01,89.42,21.60,0.000088
30,2025-01-01,89.43,21.28,0.000051
31,2025-01-01,89.44,20.97,0.000076
32,2025-01-01,89.45,20.65,0.000047
...,...,...,...,...
3207,2029-12-01,88.58,19.99,0.000121
3208,2029-12-01,88.58,20.31,0.000087
3209,2029-12-01,88.98,21.59,0.000036
3197,2029-12-01,88.11,21.57,0.000028


In [8]:
# Convert no3 from mol/m3 to mmol/m3
no3['no3'] = no3['no3'] * 1000

no3

Unnamed: 0,time,longitude,latitude,no3
0,2025-01-01,87.24,21.24,0.029837
29,2025-01-01,89.42,21.60,0.087528
30,2025-01-01,89.43,21.28,0.051119
31,2025-01-01,89.44,20.97,0.075775
32,2025-01-01,89.45,20.65,0.046936
...,...,...,...,...
3207,2029-12-01,88.58,19.99,0.121051
3208,2029-12-01,88.58,20.31,0.086660
3209,2029-12-01,88.98,21.59,0.036267
3197,2029-12-01,88.11,21.57,0.028287


In [9]:
# Save the dataset as a CSV file in the processed_data folder
no3.to_csv('/Users/arup/Documents/ISRO-Project/prediction/processed_data/no3.csv', index=False)