# Figure 3 (a), (b)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import cmocean
import gsw
from scipy.ndimage import uniform_filter1d

# Define the file path
file_path = '/mnt/d/Data/Argo/Argo/PR_PF_7900334.csv'

# Extract the Argo float number from the filename
file_name = os.path.basename(file_path)
argo_float_number = file_name.split('_')[2].split('.')[0]

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES_ADJUSTED (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP_ADJUSTED (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')
df['PSAL_ADJUSTED (psu)'] = pd.to_numeric(df['PSAL_ADJUSTED (psu)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Ensure no missing values for tricontourf
df = df.dropna(subset=['PRES (decibar)', 'TEMP (degree_Celsius)', 'PSAL_ADJUSTED (psu)'])

# Filter the data for the defined rectangle
df = df[(df['longitude'] >= rect_min_lon) & 
        (df['longitude'] <= rect_max_lon) & 
        (df['latitude'] >= rect_min_lat) & 
        (df['latitude'] <= rect_max_lat)]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[df['timestamp'] <= pd.Timestamp('2017-03-30').timestamp()]

# Extract unique profiles (timestamps) for marking
unique_profiles = df['timestamp'].drop_duplicates()

# Calculate Absolute Salinity (SA), Conservative Temperature (CT), and Density
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], 
                          df['PRES (decibar)'], 
                          df['longitude'], 
                          df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], 
                         df['TEMP (degree_Celsius)'], 
                         df['PRES (decibar)'])
df['density'] = gsw.sigma0(df['SA'], df['CT'])  # potential density anomaly

# --------------------------
# Calculate Mixed Layer Depth (MLD)
# --------------------------
def calculate_mld(profile):
    # Get the surface density (first value in the profile)
    surface_density = profile['density'].iloc[0]
    # Find the first index where the density exceeds the surface density by 0.03 kg/m^3
    mld_index = (profile['density'] > (surface_density + 0.03)).idxmax()
    return profile['PRES (decibar)'].loc[mld_index] if mld_index else np.nan

# Group profiles by lat-lon (a simple grouping)
df['profile'] = df.groupby(['LATITUDE (degree_north)', 'LONGITUDE (degree_east)']).ngroup()
mld_values = df.groupby('profile').apply(calculate_mld).reset_index(level=0, drop=True)

# Merge MLD values back to the DataFrame (using unique profile rows)
u_profiles = df.drop_duplicates(subset=['profile'])
u_profiles = u_profiles.copy()  # avoid SettingWithCopyWarning
u_profiles['mld'] = u_profiles['profile'].map(mld_values)

# For plotting MLD, get the timestamps and mld values per profile
timestamps = u_profiles['timestamp']
mld = u_profiles['mld']
mld_smoothed = uniform_filter1d(mld, size=5)

# --------------------------
# Define Vertical Lines and Date Ticks
# --------------------------
vertical_lines_dates = ['2016-08-08', '2016-09-02']
vertical_lines_timestamps = [pd.Timestamp(date).timestamp() for date in vertical_lines_dates]

# Generate ticks for months and years from the data timestamps
date_ticks_months = pd.date_range(start=pd.to_datetime(df['timestamp'], unit='s').min(), 
                                  end=pd.to_datetime(df['timestamp'], unit='s').max(), freq='M')
month_timestamps = date_ticks_months.map(lambda x: x.timestamp())

date_ticks_years = pd.date_range(start=pd.to_datetime(df['timestamp'], unit='s').min(), 
                                 end=pd.to_datetime(df['timestamp'], unit='s').max(), freq='YS')
year_timestamps = date_ticks_years.map(lambda x: x.timestamp())
year_labels = date_ticks_years.strftime('%Y')

# --------------------------
# Plot Temperature and Salinity Contours with MLD Overlay
# --------------------------
plt.figure(figsize=(16, 10))

# Temperature contour plot (subplot 1)
plt.subplot(2, 1, 1)
l_temp = np.arange(-1.9, 2, 0.1)
contour_temp = plt.tricontourf(df['timestamp'], df['PRES (decibar)'], df['CT'], levels=l_temp, cmap=cmocean.cm.thermal)
plt.ylim(2000, 0)
cbar_temp = plt.colorbar(contour_temp)
cbar_temp.set_label('Temperature (°C)', fontsize=15)
cbar_temp.ax.tick_params(labelsize=14)
plt.plot(timestamps, mld, color='white', linestyle='-', markersize=4, label='MLD')
plt.ylabel('Depth (dbar)', fontsize=15)
plt.yticks(np.arange(0, 2001, 500), fontsize=15)
for vline in vertical_lines_timestamps:
    plt.axvline(x=vline, color='red', linestyle='--', linewidth=1.5)
plt.xticks(year_timestamps, year_labels, fontsize=14, rotation=0)
plt.tick_params(axis='x', which='minor', direction='out', length=5, color='black')
ax = plt.gca()
ax.set_xticks(month_timestamps, minor=True)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(unique_profiles)
ax2.tick_params(axis='x', direction='out', length=5, color='black')
ax2.set_xticklabels([])

# Salinity contour plot (subplot 2)
plt.subplot(2, 1, 2)
# You can adjust levels; here we let tricontourf choose levels (or specify if needed)
contour_sal = plt.tricontourf(df['timestamp'], df['PRES (decibar)'], df['SA'], levels=50, cmap=cmocean.cm.haline)
plt.ylim(2000, 0)
cbar_sal = plt.colorbar(contour_sal)
cbar_sal.set_label('Salinity (psu)', fontsize=15)
cbar_sal.ax.tick_params(labelsize=14)
plt.plot(timestamps, mld, color='white', linestyle='-', label='MLD')
plt.ylabel('Depth (dbar)', fontsize=15)
plt.yticks(np.arange(0, 2001, 500), fontsize=15)
for vline in vertical_lines_timestamps:
    plt.axvline(x=vline, color='red', linestyle='--', linewidth=1.5)
plt.xticks(year_timestamps, year_labels, fontsize=14, rotation=0)
plt.tick_params(axis='x', which='minor', direction='out', length=5, color='black')
ax = plt.gca()
ax.set_xticks(month_timestamps, minor=True)
ax2 = ax.twiny()
ax2.set_xlim(ax.get_xlim())
ax2.set_xticks(unique_profiles)
ax2.tick_params(axis='x', direction='out', length=5, color='black')
ax2.set_xticklabels([])

plt.xlabel('Date', fontsize=15)
plt.tight_layout()
# plt.savefig(f'/home/soumya/Backup/Plots/Argo/{argo_float_number}_temp_salinity.png', dpi=300)
plt.show()


# Figure 3 (c)

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.dates as mdates
import gsw

# Define the file path
file_path = '/mnt/d/Data/Argo/Argo/PR_PF_7900334.csv'

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP_ADJUSTED (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')
df['PSAL_ADJUSTED (psu)'] = pd.to_numeric(df['PSAL_ADJUSTED (psu)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Filter the DataFrame based on the defined coordinates
df = df[
        (df['longitude'] >= rect_min_lon) & 
        (df['longitude'] <= rect_max_lon) & 
        (df['latitude'] >= rect_min_lat) & 
        (df['latitude'] <= rect_max_lat)
    ]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[(df['timestamp'] <= pd.Timestamp('2017-03-30').timestamp())]

# Calculate Absolute Salinity (SA), Conservative Temperature (CT), and Density
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], df['PRES (decibar)'], df['longitude'], df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], df['TEMP (degree_Celsius)'], df['PRES (decibar)'])
df['density'] = gsw.sigma0(df['SA'], df['CT'])  # Calculate potential density anomaly

vertical_lines_dates = ['2016-08-08', '2016-09-02']
vertical_lines_timestamps = [pd.Timestamp(date).timestamp() for date in vertical_lines_dates]

# Function to calculate the Mixed Layer Depth (MLD) based on density
def calculate_mld(profile):
    surface_density = profile['density'].iloc[0]  # Get surface density
    mld_index = (profile['density'] > (surface_density + 0.03)).idxmax()  # Find MLD based on density threshold
    return profile['PRES (decibar)'].loc[mld_index] if mld_index else np.nan

# Calculate MLD for each profile
df['profile'] = df.groupby(['LATITUDE (degree_north)', 'LONGITUDE (degree_east)']).ngroup()  # Group profiles by lat-lon
mld_values = df.groupby('profile').apply(calculate_mld).reset_index(level=0, drop=True)

# Ensure MLD values are correctly mapped back to the profiles
df = df.merge(mld_values.rename('mld'), left_on='profile', right_index=True, how='left')

# Extract unique profiles for the line plot
u_profiles = df.drop_duplicates(subset=['profile'])
timestamps = pd.to_datetime(u_profiles['timestamp'], unit='s')
mld = u_profiles['mld']

# Calculate the salinity at the mixed layer and 250m depth
def get_mixed_layer_and_250m_salinity(profile):
    # Mixed layer salinity (nearest to MLD)
    mld_value = profile['mld'].iloc[0]
    mixed_layer = profile.loc[(profile['PRES (decibar)'] <= mld_value), :]
    mixed_layer_sal = mixed_layer['PSAL_ADJUSTED (psu)'].mean() if not mixed_layer.empty else np.nan

    # Salinity at 250 dbar
    sal_250m = profile.loc[np.abs(profile['PRES (decibar)'] - 250).idxmin()]['PSAL_ADJUSTED (psu)']

    return pd.Series([mixed_layer_sal, sal_250m])

# Apply the function to extract mixed layer and 250m salinity values
salinity_values = df.groupby('profile').apply(get_mixed_layer_and_250m_salinity)
salinity_values.columns = ['mixed_layer_sal', 'sal_250m']

# Merge salinity values with profile data
df = pd.merge(u_profiles, salinity_values, left_on='profile', right_index=True)

# Calculate the salinity difference between MLD and 250m
df['salinity_difference'] = df['mixed_layer_sal'] - df['sal_250m']


    
# Plot the salinity difference
plt.figure(figsize=(16, 6))
plt.plot(timestamps, df['salinity_difference'], color='purple', marker='s', label='Salinity Difference (MLD - 250m)')
plt.ylabel('Salinity Difference (PSU)', fontsize=14)
# plt.xlabel('Date', fontsize=14)
# plt.legend()
# plt.grid(True)
# Add vertical lines for specific dates
for vline in vertical_lines_dates:
    plt.axvline(
        x=pd.Timestamp(vline),
        color='red',
        linestyle='--',
        linewidth=1.5,
        label=f'Event: {vline}' if vline == vertical_lines_dates[0] else None
    )
x_min, x_max = timestamps.min(), timestamps.max()
# Set x-axis limits to avoid extra spaces
plt.xlim([x_min, x_max])
# Format the x-axis to show year-month
plt.gca().xaxis.set_major_locator(mdates.MonthLocator())
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.tick_params(axis='both', which='major', labelsize=14)
plt.xticks(rotation=45)

plt.tight_layout()
plt.savefig('/home/soumya/Backup/Plots/Argo/mld_sal_250m_diff.png', dpi=400)
plt.show()


# Figure 3 (d), (e)

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import glob
import numpy as np

# --- Set global font size ---
plt.rcParams.update({'font.size': 14})

# Load the Argo data from CSV files
argo_files = glob.glob('/mnt/d/Data/Argo/Argo/*.csv')
argo_data = pd.concat((pd.read_csv(f) for f in argo_files), ignore_index=True)

# Convert necessary columns
argo_data['time'] = pd.to_datetime(argo_data['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
argo_data['latitude'] = pd.to_numeric(argo_data['LATITUDE (degree_north)'], errors='coerce')
argo_data['longitude'] = pd.to_numeric(argo_data['LONGITUDE (degree_east)'], errors='coerce')

# Geographic limits
lon_min, lon_max = 20, 70
lat_min, lat_max = -70, -65

# Filter by geographic box
argo_data_filtered = argo_data[
    (argo_data['longitude'] >= lon_min) & (argo_data['longitude'] <= lon_max) &
    (argo_data['latitude'] >= lat_min) & (argo_data['latitude'] <= lat_max)
]

# Keep only JJAS (June–September)
argo_data_filtered = argo_data_filtered[argo_data_filtered['time'].dt.month.isin([6, 7, 8, 9])]

# Only keep 2014–2016
years = [2014, 2015, 2016]
argo_data_filtered = argo_data_filtered[argo_data_filtered['time'].dt.year.isin(years)]

# Determine max depth in dataset
max_depth = int(np.nanmax(argo_data_filtered['PRES (decibar)']))
bin_edges = np.arange(0, max_depth + 20, 20)  

# Initialize plot
fig, axs = plt.subplots(1, 2, figsize=(16, 6), sharey=True)

# Colors for 3 years
colors = ['blue', 'orange', 'green']

for year, color in zip(years, colors):
    yearly_data = argo_data_filtered[argo_data_filtered['time'].dt.year == year]

    # Assign bin mid-points
    yearly_data['PRES_BIN'] = pd.cut(yearly_data['PRES (decibar)'], bins=bin_edges, labels=bin_edges[:-1] + 0.5)

    # Compute bin averages
    grouped = yearly_data.groupby('PRES_BIN').agg({
        'TEMP_ADJUSTED (degree_Celsius)': 'mean',
        'PSAL_ADJUSTED (psu)': 'mean'
    }).reset_index()

    # Drop NaNs
    grouped = grouped.dropna()

    # Convert bin labels to numeric depth
    grouped['PRES_BIN'] = grouped['PRES_BIN'].astype(float)

    # Plot
    axs[0].plot(grouped['TEMP_ADJUSTED (degree_Celsius)'], grouped['PRES_BIN'], label=str(year), color=color)
    axs[1].plot(grouped['PSAL_ADJUSTED (psu)'], grouped['PRES_BIN'], label=str(year), color=color)

# Formatting
for ax, xlabel in zip(axs, ['Temperature (°C)', 'Salinity (PSU)']):
    ax.invert_yaxis()
    ax.set_xlabel(xlabel)
    ax.set_ylabel('Depth (meter)')
    ax.set_ylim(500,0)
    ax.grid(True)
    ax.legend()

plt.tight_layout()
plt.savefig('/home/soumya/Backup/Plots/Cosmonaut Polynya/line_plot.png', dpi=300)
plt.show()


# Figure S3

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import cmocean
import gsw
import sys

# --- Set global font size ---
plt.rcParams.update({'font.size': 14})

# Add the MATLAB engine path
sys.path.insert(0, r'C:\Program Files\MATLAB\R2024b\extern\engines\python')
import matlab.engine

# Define the file path
file_path = r'D:\Data\Argo\Argo\PR_PF_7900334.csv'

# Extract the Argo float number from the filename
file_name = os.path.basename(file_path)
argo_float_number = file_name.split('_')[2].split('.')[0]

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Ensure no missing values for scatter plot
df = df.dropna(subset=['PRES (decibar)', 'TEMP (degree_Celsius)', 'PSAL_ADJUSTED (psu)', 'TEMP_ADJUSTED (degree_Celsius)'])

# Filter the data for the defined rectangle
df = df[
    (df['longitude'] >= rect_min_lon) & 
    (df['longitude'] <= rect_max_lon) & 
    (df['latitude'] >= rect_min_lat) & 
    (df['latitude'] <= rect_max_lat)
]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[(df['timestamp'] >= pd.Timestamp('2016-01-01').timestamp()) & (df['timestamp'] <= pd.Timestamp('2016-03-30').timestamp())]

# Calculate Absolute Salinity, Conservative Temperature, and density using gsw
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], df['PRES (decibar)'], df['longitude'], df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], df['TEMP_ADJUSTED (degree_Celsius)'], df['PRES (decibar)'])

# Filter data within depth range of interest
df = df[df['PRES (decibar)'] <= 2000]  # Limit to 2000 dbar for clarity

# Create the plot
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Scatter plot for temperature and salinity, colored by depth
scatter = ax.scatter(df['PSAL_ADJUSTED (psu)'], df['CT'], c=df['PRES (decibar)'], cmap='viridis', s=10)

# Generate meshgrid for neutral density calculation
SP = np.linspace(33, 35, 1000)
P = np.zeros(len(SP))
SA = gsw.SA_from_SP(SP, 0, df['longitude'].mean(), df['latitude'].mean())
CT = np.linspace(-2, 1.5, 1000)
CT_freezing = gsw.CT_freezing(SA, P, 0)

SS, TT = np.meshgrid(SA, CT)
SPmesh, TT = np.meshgrid(SP, CT)

# Start MATLAB engine for neutral density calculations
eng = matlab.engine.start_matlab()
eng.addpath(r'C:\Users\Soumyadeep\Documents\MATLAB\eos80_legacy_gamma_n')
eng.addpath(r'C:\Users\Soumyadeep\Documents\MATLAB\eos80_legacy_gamma_n\library')
SPmesh_mat = matlab.double(SPmesh.tolist())
TT_mat = matlab.double(TT.tolist())
gamman_mesh = eng.eos80_legacy_gamma_n(SPmesh_mat, TT_mat, 0.0, df['longitude'].mean().item(), df['latitude'].mean().item())

# Plot neutral density contours in black
sig_lines = [28, 28.27]  # Neutral density levels to highlight
CS = ax.contour(SPmesh, TT, gamman_mesh, sig_lines, colors='black', linewidths=2)
ax.clabel(CS, inline=True, fontsize=14, fmt='%.2f')

# Plot freezing line
ax.plot(SA, CT_freezing, color='cyan', linestyle='--', linewidth=1.5, label='Freezing Line')

# Add colorbar to indicate depth
cbar = plt.colorbar(scatter)
cbar.set_label('Depth (dbar)')
# cbar.ax.tick_params(labelsize=14)

# Text annotations for water masses
ax.text(34, -1, 'AASW', color='red', fontsize=16, fontweight='bold')  # Antarctic Surface Water
ax.text(34.6, 0.7, 'CDW', color='red', fontsize=16, fontweight='bold')  # Circumpolar Deep Water
ax.text(34.45, -0.5, 'mCDW', color='red', fontsize=16, fontweight='bold')  # Modified Circumpolar Deep Water

# Set plot labels and limits
plt.xlabel('Practical Salinity (PSU)')
plt.ylabel('Conservative Temperature (°C)')
ax.set_xlim(33.9, 34.9)
ax.set_ylim(-2, 1.5)

# Add a horizontal line for the freezing point
plt.axhline(y=0, color='red', linestyle='--', linewidth=1.5)

# Turn on grid
ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
ax.tick_params(axis='both', which='major', labelsize=14)

# ---- 📍 Centroid Calculation for CT > 0°C ----
# Filter points with Conservative Temperature > 0°C
warm_points = df[df['CT'] > 0]

# Calculate the centroid (mean salinity and temperature)
centroid_salinity = warm_points['PSAL_ADJUSTED (psu)'].mean()
centroid_temperature = warm_points['CT'].mean()

# Plot the centroid as a large red dot
ax.scatter(centroid_salinity, centroid_temperature, 
           color='red', s=150, marker='o', edgecolor='black', 
           label='Centroid (CT > 0°C)')

# Tight layout and save plot
plt.tight_layout()
plt.savefig('before_polynya_with_centroid.png', dpi=400)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import cmocean
import gsw
import sys

# --- Set global font size ---
plt.rcParams.update({'font.size': 14})

# Add the MATLAB engine path
sys.path.insert(0, r'C:\Program Files\MATLAB\R2024b\extern\engines\python')
import matlab.engine

# Define the file path
file_path = r'D:\Data\Argo\Argo\PR_PF_7900334.csv'

# Extract the Argo float number from the filename
file_name = os.path.basename(file_path)
argo_float_number = file_name.split('_')[2].split('.')[0]

# Load the CSV file
df = pd.read_csv(file_path)

# Ensure the correct data types
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df['PRES (decibar)'] = pd.to_numeric(df['PRES (decibar)'], errors='coerce')
df['TEMP (degree_Celsius)'] = pd.to_numeric(df['TEMP (degree_Celsius)'], errors='coerce')
df['latitude'] = pd.to_numeric(df['LATITUDE (degree_north)'], errors='coerce')
df['longitude'] = pd.to_numeric(df['LONGITUDE (degree_east)'], errors='coerce')

# Define the rectangle box coordinates
rect_min_lon, rect_max_lon = 20, 70
rect_min_lat, rect_max_lat = -70, -60

# Ensure no missing values for scatter plot
df = df.dropna(subset=['PRES (decibar)', 'TEMP (degree_Celsius)', 'PSAL_ADJUSTED (psu)', 'TEMP_ADJUSTED (degree_Celsius)'])

# Filter the data for the defined rectangle
df = df[
    (df['longitude'] >= rect_min_lon) & 
    (df['longitude'] <= rect_max_lon) & 
    (df['latitude'] >= rect_min_lat) & 
    (df['latitude'] <= rect_max_lat)
]

# Convert dates to numeric values (timestamps) for plotting
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())
df = df[(df['timestamp'] >= pd.Timestamp('2016-06-01').timestamp()) & (df['timestamp'] <= pd.Timestamp('2016-10-30').timestamp())]

# Calculate Absolute Salinity, Conservative Temperature, and density using gsw
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], df['PRES (decibar)'], df['longitude'], df['latitude'])
df['CT'] = gsw.CT_from_t(df['SA'], df['TEMP_ADJUSTED (degree_Celsius)'], df['PRES (decibar)'])

# Filter data within depth range of interest
df = df[df['PRES (decibar)'] <= 2000]  # Limit to 2000 dbar for clarity

# Create the plot
fig, ax = plt.subplots(1, 1, figsize=(12, 10))

# Scatter plot for temperature and salinity, colored by depth
scatter = ax.scatter(df['PSAL_ADJUSTED (psu)'], df['CT'], c=df['PRES (decibar)'], cmap='viridis', s=10)

# Generate meshgrid for neutral density calculation
SP = np.linspace(33, 35, 1000)
P = np.zeros(len(SP))
SA = gsw.SA_from_SP(SP, 0, df['longitude'].mean(), df['latitude'].mean())
CT = np.linspace(-2, 1.5, 1000)
CT_freezing = gsw.CT_freezing(SA, P, 0)

SS, TT = np.meshgrid(SA, CT)
SPmesh, TT = np.meshgrid(SP, CT)

# Start MATLAB engine for neutral density calculations
eng = matlab.engine.start_matlab()
eng.addpath(r'C:\Users\Soumyadeep\Documents\MATLAB\eos80_legacy_gamma_n')
eng.addpath(r'C:\Users\Soumyadeep\Documents\MATLAB\eos80_legacy_gamma_n\library')
SPmesh_mat = matlab.double(SPmesh.tolist())
TT_mat = matlab.double(TT.tolist())
gamman_mesh = eng.eos80_legacy_gamma_n(SPmesh_mat, TT_mat, 0.0, df['longitude'].mean().item(), df['latitude'].mean().item())

# Plot neutral density contours in black
sig_lines = [28, 28.27]  # Neutral density levels to highlight
CS = ax.contour(SPmesh, TT, gamman_mesh, sig_lines, colors='black', linewidths=2)
ax.clabel(CS, inline=True, fontsize=14, fmt='%.2f')

# Plot freezing line
ax.plot(SA, CT_freezing, color='cyan', linestyle='--', linewidth=1.5, label='Freezing Line')

# Add colorbar to indicate depth
cbar = plt.colorbar(scatter)
cbar.set_label('Depth (dbar)', fontsize=16)
cbar.ax.tick_params(labelsize=14)

# Text annotations for water masses
ax.text(34, -1, 'AASW', color='red', fontsize=16, fontweight='bold')  # Antarctic Surface Water
ax.text(34.6, 0.7, 'CDW', color='red', fontsize=16, fontweight='bold')  # Circumpolar Deep Water
ax.text(34.45, -0.5, 'mCDW', color='red', fontsize=16, fontweight='bold')  # Modified Circumpolar Deep Water

# Set plot labels and limits
plt.xlabel('Practical Salinity (PSU)')
plt.ylabel('Conservative Temperature (°C)')
ax.set_xlim(33.9, 34.9)
ax.set_ylim(-2, 1.5)

# Add a horizontal line for the freezing point
plt.axhline(y=0, color='red', linestyle='--', linewidth=1.5)

# Turn on grid
ax.grid(True, linestyle='--', linewidth=0.5, alpha=0.7)
ax.tick_params(axis='both', which='major', labelsize=14)

# ---- 📍 Centroid Calculation for CT > 0°C ----
# Filter points with Conservative Temperature > 0°C
warm_points = df[df['CT'] > 0]

# Calculate the centroid (mean salinity and temperature)
centroid_salinity = warm_points['PSAL_ADJUSTED (psu)'].mean()
centroid_temperature = warm_points['CT'].mean()

# Plot the centroid as a large red dot
ax.scatter(centroid_salinity, centroid_temperature, 
           color='red', s=150, marker='o', edgecolor='black', 
           label='Centroid (CT > 0°C)')

# Tight layout and save plot
plt.tight_layout()
plt.savefig('during_polynya_with_centroid.png', dpi=400)
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import cmocean
import gsw
import sys
import matplotlib.colors as mcolors
from matplotlib.colors import ListedColormap, BoundaryNorm
from matplotlib.patches import Patch

# --- Set global font size ---
plt.rcParams.update({'font.size': 14})

# Start MATLAB engine
sys.path.insert(0, r'C:\Program Files\MATLAB\R2024b\extern\engines\python')
import matlab.engine

# Load and preprocess data
file_path = r'D:\Data\Argo\Argo\PR_PF_7900334.csv'
df = pd.read_csv(file_path)

# Data cleaning and type conversion
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
numeric_cols = ['PRES (decibar)', 'TEMP (degree_Celsius)', 
               'LATITUDE (degree_north)', 'LONGITUDE (degree_east)']
df[numeric_cols] = df[numeric_cols].apply(pd.to_numeric, errors='coerce')

# Filter time range first
df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] = pd.to_datetime(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'])
df = df[(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] >= '2015-01-01') & 
        (df['DATE (YYYY-MM-DDTHH:MI:SSZ)'] <= '2017-02-28')]

# Filter data to study area
df = df[(df['LONGITUDE (degree_east)'].between(20, 70)) & 
        (df['LATITUDE (degree_north)'].between(-70, -60))]

# Calculate Absolute Salinity and Conservative Temperature
df['SA'] = gsw.SA_from_SP(df['PSAL_ADJUSTED (psu)'], df['PRES (decibar)'],
                          df['LONGITUDE (degree_east)'], df['LATITUDE (degree_north)'])
df['CT'] = gsw.CT_from_t(df['SA'], df['TEMP_ADJUSTED (degree_Celsius)'], df['PRES (decibar)'])

# Calculate Neutral Density using MATLAB
eng = matlab.engine.start_matlab()
eng.addpath(r'C:\Users\Soumyadeep\Documents\MATLAB\eos80_legacy_gamma_n')
gamma_n = eng.eos80_legacy_gamma_n(
    matlab.double(df['PSAL_ADJUSTED (psu)'].tolist()),
    matlab.double(df['CT'].tolist()),
    matlab.double(df['PRES (decibar)'].tolist()),
    matlab.double(df['LONGITUDE (degree_east)'].tolist()),
    matlab.double(df['LATITUDE (degree_north)'].tolist())
)
df['gamma_n'] = np.array(gamma_n).ravel()
df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=['gamma_n', 'PRES (decibar)'])

# Classify water masses
conditions = [
    # Dense Shelf Water (DSW)
    (df['PSAL_ADJUSTED (psu)'] > 34.5) & 
    (df['TEMP_ADJUSTED (degree_Celsius)'].between(-1.9, -1.8)) &
    (df['gamma_n'] > 28.27),
    
    # Modified Circumpolar Deep Water (mCDW)
    (df['TEMP_ADJUSTED (degree_Celsius)'].between(-1.8, 0)) &
    (df['gamma_n'].between(28, 28.27)),
    
    # Circumpolar Deep Water (CDW)
    (df['PSAL_ADJUSTED (psu)'] > 34.5) & 
    (df['TEMP_ADJUSTED (degree_Celsius)'] >= 0),
    
    # Antarctic Bottom Water (AABW)
    (df['gamma_n'] > 28.27),

    # Antarctic Surface Water (AASW)
    (df['gamma_n'] < 28)
]

water_masses = ['DSW', 'mCDW', 'CDW', 'AABW', 'AASW']
df['water_mass'] = np.select(conditions, water_masses, default='Other')
df['timestamp'] = df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].apply(lambda x: x.timestamp())

# Create colormap
color_map = {
    'DSW': '#1f77b4',   # Blue
    'mCDW': '#2ca02c',  # Green
    'CDW': '#d62728',   # Red
    'AABW': '#9467bd',  # Purple
    'AASW': '#ff7f0e',   # Orange
    'Other': '#7f7f7f'  # Gray
}
cmap = ListedColormap([color_map[wm] for wm in water_masses + ['Other']])
bounds = np.arange(len(water_masses + ['Other']) + 1)
norm = BoundaryNorm(bounds, cmap.N)

# Create plot
plt.figure(figsize=(16, 6))
ax = plt.gca()

# Plot water masses
contour = ax.tricontourf(
    df['timestamp'], 
    df['PRES (decibar)'], 
    pd.Categorical(df['water_mass'], categories=water_masses + ['Other']).codes,
    levels=bounds,
    cmap=cmap,
    norm=norm
)

# Calculate Mixed Layer Depth (MLD) with improved grouping
def calculate_mld(profile):
    if profile.empty: 
        return np.nan
    surface_density = profile['density'].iloc[0]
    density_mask = profile['density'] > (surface_density + 0.03)
    
    if not density_mask.any():
        return np.nan
    
    first_true_pos = np.argmax(density_mask.values)
    return profile['PRES (decibar)'].iloc[first_true_pos]

df['density'] = gsw.sigma0(df['SA'], df['CT'])
df['profile'] = df.groupby([
    'LATITUDE (degree_north)', 
    'LONGITUDE (degree_east)', 
    'DATE (YYYY-MM-DDTHH:MI:SSZ)'
]).ngroup()

mld = df.groupby('profile').apply(calculate_mld).reset_index(name='mld')
mld_df = df.merge(mld, on='profile').drop_duplicates('profile')
plt.plot(mld_df['timestamp'], mld_df['mld'], 'w-', 
         markersize=4, linewidth=1, label='MLD')

# Axis configuration
ax.set_ylim(2000, 0)
ax.set_ylabel('Depth (meter)')

# Date formatting
years = pd.date_range(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].min(),
                      df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].max(), freq='AS')
ax.set_xticks([y.timestamp() for y in years])
ax.set_xticklabels([y.strftime('%Y') for y in years], fontsize=12)

# Monthly minor ticks
months = pd.date_range(df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].min(),
                       df['DATE (YYYY-MM-DDTHH:MI:SSZ)'].max(), freq='MS')
ax.set_xticks([m.timestamp() for m in months], minor=True)
ax.tick_params(axis='x', which='minor', length=3)

# Top profile axis
ax_top = ax.twiny()
ax_top.set_xlim(ax.get_xlim())
ax_top.set_xticks(df['timestamp'].unique())
ax_top.tick_params(axis='x', length=6, direction='out')
ax_top.set_xticklabels([])

# Add legend
legend_elements = [Patch(facecolor=color_map[wm], label=wm) for wm in water_masses + ['Other']]

plt.tight_layout()
plt.savefig('water mass.png', dpi=400)