In [1]:
import pandas as pd
import numpy as np
import xarray as xr
import scipy as sp

In [2]:
data = pd.read_csv('data/line_208.csv')

In [3]:
data.head()

Unnamed: 0,cruiseline,station,date,depth,temperature
0,208,1,1969-02-10 04:12:00,0,13.3
1,208,1,1969-02-10 04:12:00,10,13.3
2,208,1,1969-02-10 04:12:00,20,13.2
3,208,1,1969-02-10 04:12:00,30,13.2
4,208,1,1969-02-10 04:12:00,60,13.0


In [4]:
data['date'] = pd.to_datetime(data['date'])

In [5]:
stations = [1, 2, 3, 4]
df_interp = pd.DataFrame()
standard_depths = np.array([0, 10, 20, 30, 50, 75, 100, 125, 150, 200, 250, 300, 400, 500])
for station in stations:
    timeindex = data['date'].unique()
    for time in timeindex:
        df_station = data[(data['station'] == station) & (data['date'] == time)]
        if df_station.shape[0] < 2:
            continue
        else:
            di = sp.interpolate.interp1d(df_station['depth'], df_station['temperature'], kind='linear', fill_value=0, bounds_error=False)
            d_interp = pd.DataFrame({
                'temperature': di(standard_depths),
                'depth': standard_depths,
                'station': station,
                'date': time
            })
            df_interp = pd.concat([df_interp, d_interp], ignore_index=True)


In [6]:
# First pivot the data into the correct shape
pivot_data = df_interp.pivot(index='date', columns=['station', 'depth'], values='temperature')

# Create DataArray with proper dimensions
da = xr.DataArray(
    pivot_data.values.reshape(len(df_interp['date'].unique()), len(stations), len(standard_depths)),
    coords={
        'time': df_interp['date'].unique(),
        'station': stations,
        'depth': standard_depths
    },
    dims=['time', 'station', 'depth']
)

In [7]:
da.name = 'temperature'
da.attrs['units'] = 'degrees Celsius'

In [8]:
da.to_netcdf('data/line_208_interp.nc')