In [1]:
import xarray as xr

import pandas as pd
import numpy as np

In [2]:
# Load the .nc file
fe = xr.open_dataset('/Users/arup/Documents/ISRO-Project/prediction/raw_data/fe.nc')

# View the data
fe

In [3]:
# Check lev (depth) metadata
print(fe.lev.attrs)
# Output should confirm:
#   units = "meters"
#   positive = "down" (standard for ocean depth)

{'standard_name': 'depth', 'long_name': 'ocean depth coordinate', 'units': 'm', 'positive': 'down', 'axis': 'Z', 'bounds': 'lev_bnds'}


In [4]:
# Convert the 4D data to 2D dataframe
fe = fe['dfe'].to_dataframe().reset_index()

# Rename columns for clarity
fe = fe.rename(columns={'dfe': 'fe'})

fe

Unnamed: 0,time,lev,j,i,longitude,latitude,fe
0,2025-01-16 12:00:00,6.0,137.0,10.0,87.209916,22.810536,
1,2025-01-16 12:00:00,6.0,137.0,11.0,87.644821,22.818800,
2,2025-01-16 12:00:00,6.0,137.0,12.0,88.079744,22.827879,
3,2025-01-16 12:00:00,6.0,137.0,13.0,88.514688,22.837772,
4,2025-01-16 12:00:00,6.0,137.0,14.0,88.949652,22.848474,
...,...,...,...,...,...,...,...
215995,2029-12-16 12:00:00,5720.0,146.0,14.0,89.022688,19.994032,
215996,2029-12-16 12:00:00,5720.0,146.0,15.0,89.463056,20.002688,
215997,2029-12-16 12:00:00,5720.0,146.0,16.0,89.903443,20.011949,
215998,2029-12-16 12:00:00,5720.0,146.0,17.0,90.343850,20.021812,


In [5]:
# Get the time range this dataset covers
time_range_start = fe['time'].min()
time_range_end = fe['time'].max()

print(f"Time range: {time_range_start} to {time_range_end}")

Time range: 2025-01-16 12:00:00 to 2029-12-16 12:00:00


In [6]:
# Get the time range this dataset covers
time_range_start = fe['lev'].min()
time_range_end = fe['lev'].max()

print(f"Lev range: {time_range_start} to {time_range_end}")

Lev range: 6.0 to 5720.0


In [7]:
# Filter the dataset to include only rows where lev = 6
fe = fe[fe['lev'] == 6.0]

# Select only required columns
fe = fe[['time', 'longitude', 'latitude', 'fe']]

# Remove NaN values from fe column
fe = fe.dropna(subset=['fe'])

# Round off latitude and longitude to 2 decimal places
fe['longitude'] = fe['longitude'].round(2)
fe['latitude'] = fe['latitude'].round(2)

# Convert time to datetime if not already
fe['time'] = pd.to_datetime(fe['time'])

# Create a new column with first day of each month
fe['time'] = fe['time'].dt.strftime('%Y-%m-01')

# Group by time, longitude, latitude and calculate mean of fe
fe = fe.groupby(['time', 'longitude', 'latitude'])['fe'].mean().reset_index()

# Convert time back to datetime
fe['time'] = pd.to_datetime(fe['time'])

# Sort by date
fe = fe.sort_values('time')

print("Shape after monthly aggregation:", fe.shape)

fe

Shape after monthly aggregation: (3240, 4)


Unnamed: 0,time,longitude,latitude,fe
0,2025-01-01,87.24,21.24,0.000002
29,2025-01-01,89.42,21.60,0.000002
30,2025-01-01,89.43,21.28,0.000002
31,2025-01-01,89.44,20.97,0.000002
32,2025-01-01,89.45,20.65,0.000002
...,...,...,...,...
3207,2029-12-01,88.58,19.99,0.000002
3208,2029-12-01,88.58,20.31,0.000002
3209,2029-12-01,88.98,21.59,0.000002
3197,2029-12-01,88.11,21.57,0.000002


In [8]:
# Convert fe from mol/m3 to mmol/m3
fe['fe'] = fe['fe'] * 1000

fe

Unnamed: 0,time,longitude,latitude,fe
0,2025-01-01,87.24,21.24,0.001541
29,2025-01-01,89.42,21.60,0.001674
30,2025-01-01,89.43,21.28,0.001692
31,2025-01-01,89.44,20.97,0.001744
32,2025-01-01,89.45,20.65,0.001771
...,...,...,...,...
3207,2029-12-01,88.58,19.99,0.001633
3208,2029-12-01,88.58,20.31,0.001728
3209,2029-12-01,88.98,21.59,0.002267
3197,2029-12-01,88.11,21.57,0.001615


In [9]:
# Save the dataset as a CSV file in the processed_data folder
fe.to_csv('/Users/arup/Documents/ISRO-Project/prediction/processed_data/fe.csv', index=False)