In [1]:
import xarray as xr
import numpy as np
import dask.array as da
import pandas as pd
import datetime

In [19]:
# Read the text file as a pandas dataframe
df = pd.read_csv('raw/discharge/DFL_445.txt', header= 16)

In [20]:
# Making a column called date 
df['Date'] = df.index

In [4]:
# Changing the index to datetime
df['Date'] = pd.to_datetime(df['Date'])
df = df.set_index('Date')

In [5]:
# Changing the column name
df.rename(columns={' 2015 ':'Discharge'}, inplace=True)

In [6]:
df

Unnamed: 0_level_0,Discharge
Date,Unnamed: 1_level_1
2000-01-01,39.1
2000-01-02,39.1
2000-01-03,38.5
2000-01-04,38.5
2000-01-05,38.3
...,...
2015-12-27,25.4
2015-12-28,26.2
2015-12-29,27.3
2015-12-30,27.2


Now this data has to be converted to a netcdf file

In [7]:
ds = df.to_xarray()

In [8]:
ds

In [9]:
# Setting up metadata

discharge_stations = {
    'Arughat': {
        'name' : 'Arughat',
        'river' : 'Budhi Gandaki',
        'elevation' : 485,
        'start_date' : '1963-11-28',
        'latitude' : 28.04361,
        'longitude' : 84.81639,
        'x' : 285359.2540269664,
        'y' : 3103956.8282662127,
    }
}

In [10]:
ds

In [13]:
x_discharge = np.array(discharge_stations['Arughat']['x'], dtype = 'float64').reshape((len(discharge_series), ))
y_discharge = np.array(discharge_stations['Arughat']['y'], dtype = 'float64').reshape((len(discharge_series), ))
z_discharge = np.array(discharge_stations['Arughat']['elevation'], dtype = 'float64').reshape((len(discharge_series), ))
discharge_series = np.array([1])
discharge_series = discharge_series.reshape(1,)
x_discharge_da = da.from_array(x_discharge, chunks= len(discharge_series))
y_discharge_da = da.from_array(y_discharge, chunks = len(discharge_series))
z_discharge_da = da.from_array(z_discharge, chunks = len(discharge_series))


In [14]:

daily_flow = ds.Discharge.values
daily_flow = np.reshape(daily_flow, (len(daily_flow), len(discharge_series)))
daily_flow = da.from_array(daily_flow, chunks = (len(ds.Date), (len(discharge_series))))
series_name = np.array([discharge_stations['Arughat']['name']]).astype('object').reshape(len(discharge_series,))
series_name = da.from_array(series_name, chunks = (len(series_name),) )
discharge_crs = np.array([-2147483647]).astype('int32').reshape(())
catchment_id = np.array([10]).astype('int32').reshape(len(series_name,))
catchment_id = da.from_array(catchment_id)


In [15]:
# Creating the dataset
disc_ds = xr.Dataset(
    data_vars = dict(
        series_name = (['series'], series_name),
        catchment_id = (['series'], catchment_id),
        crs =  discharge_crs,
        discharge = (['time', 'series'], daily_flow),
        
    ),
    coords = dict(
        time = ds.Date.values,
        x = (['series'],x_discharge_da),
        y = (['series'], y_discharge_da),
        z = (['series'], z_discharge_da)
    )
)

In [16]:
disc_ds

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (1,) (1,) Count 1 Graph Layer 1 Chunks Type float64 numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (1,) (1,) Count 1 Graph Layer 1 Chunks Type float64 numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (1,) (1,) Count 1 Graph Layer 1 Chunks Type float64 numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,object,numpy.ndarray
"Array Chunk Bytes 8 B 8 B Shape (1,) (1,) Count 1 Graph Layer 1 Chunks Type object numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,object,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,int32,numpy.ndarray
"Array Chunk Bytes 4 B 4 B Shape (1,) (1,) Count 1 Graph Layer 1 Chunks Type int32 numpy.ndarray",1  1,

Unnamed: 0,Array,Chunk
Bytes,4 B,4 B
Shape,"(1,)","(1,)"
Count,1 Graph Layer,1 Chunks
Type,int32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,44.17 kiB,44.17 kiB
Shape,"(5654, 1)","(5654, 1)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray
"Array Chunk Bytes 44.17 kiB 44.17 kiB Shape (5654, 1) (5654, 1) Count 1 Graph Layer 1 Chunks Type float64 numpy.ndarray",1  5654,

Unnamed: 0,Array,Chunk
Bytes,44.17 kiB,44.17 kiB
Shape,"(5654, 1)","(5654, 1)"
Count,1 Graph Layer,1 Chunks
Type,float64,numpy.ndarray


In [17]:
# Adding attributes
disc_ds.x.attrs['axis'] = 'X'
disc_ds.x.attrs['standard_name'] = 'projection_x_coordinate'
disc_ds.x.attrs['units'] = 'm'

disc_ds.y.attrs['axis'] = 'Y'
disc_ds.y.attrs['standard_name'] = 'projection_y_coordinate'
disc_ds.y.attrs['units'] = 'm'

disc_ds.z.attrs['units'] = 'm'
disc_ds.z.attrs['standard_name'] = 'height'
disc_ds.z.attrs['axis'] = 'Z'
disc_ds.z.attrs['long_name'] = 'height above mean sea level'

disc_ds.series_name.attrs['cf_role'] = 'timeseries_id'

disc_ds.discharge.attrs['units'] = 'm3 s-1'
disc_ds.discharge.attrs['grid_mapping'] = 'crs'

disc_ds.crs.attrs['proj'] = '+proj=utm +zone=45 +ellps=WGS84 +datum=WGS84 +units=m +no_defs'
disc_ds.crs.attrs['grid_mapping_name'] = 'transverse_mercator'
disc_ds.crs.attrs['epsg_code'] = 'EPSG:32645'

In [18]:
disc_ds.to_netcdf('prepared/obeserved_discharge.nc')

