# Data exploration

In [2]:
import os
import glob
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
import xarray as xr
import netCDF4 as nc
import numpy as np
from pyproj import Proj, Transformer, CRS
from pyproj.aoi import AreaOfInterest
from pyproj.database import query_utm_crs_info
import plotly as py
from plotly.graph_objs import *
from scipy.io import netcdf
from mpl_toolkits.basemap import Basemap
import plotly.express as px
import plotly.graph_objects as go

ModuleNotFoundError: No module named 'plotly'

In [2]:
# Specifying paths to data

home_dir = os.getenv("HOME")
wfde5_path = home_dir + '/Documents/MST Himalaya/WFDE5/'

# Paths to WFDE5 data
psurf_path = glob.glob(wfde5_path + 'psurf/*.nc')
qair_path = glob.glob(wfde5_path + 'qair/*.nc')
rainf_path = glob.glob(wfde5_path + 'rainf/*.nc')
snowf_path = glob.glob(wfde5_path + 'snowf/*.nc')
swdown_path = glob.glob(wfde5_path + 'swdown/*.nc')
tair_path = glob.glob(wfde5_path + 'tair/*.nc')
wind_path = glob.glob(wfde5_path + 'wind/*.nc')
asurf_path = glob.glob(wfde5_path + 'asurf/*.nc')

# Paths to cell data
cell_data_path = glob.glob(wfde5_path + 'cell_data/*.nc')

# Discharge data
discharge_path = glob.glob(home_dir + '/Documents/MST Himalaya/discharge/*.txt')


In [3]:
forcing_variables = ['pressure', 'specific_humidity', 'rainfall_flux', 'snowfall_flux', 'temperature','shortwave_down',  'wind_speed']
forcing_paths = [psurf_path, qair_path, rainf_path, snowf_path, tair_path, swdown_path, tair_path, wind_path]

In [4]:

# Read all netCDF files
def read_netcf(forcing_paths, forcing_variables):
    forcing_data = dict()
    for i, name in enumerate(forcing_variables):
        forcing_data[name] = xr.open_mfdataset(forcing_paths[i])
    return forcing_data


In [5]:
dataset = read_netcf(forcing_paths, forcing_variables)

In [117]:
rainfall = dataset['rainfall_flux'].Rainf.sel(time = '1990-08-07T16:00:00.000000000') * 3600
snowfall = dataset['snowfall_flux'].Snowf.sel(time = '1990-08-07T16:00:00.000000000') * 3600
precipitation = rainfall + snowfall
precipitation.attrs['units'] = 'mm h-1'
precipitation.attrs['long_name'] = 'Precipitation'
lat = precipitation.lat
lon = precipitation.lon
date = str(precipitation.coords['time'].values)[0:16]

'1990-08-07T16:00'

In [118]:
colorbar_title = precipitation.attrs['long_name'] + '<br>(%s)'%precipitation.attrs['units']
fig = px.imshow(precipitation, color_continuous_scale='RdBu_r', aspect='equal', origin='lower',
    title= f'Precipitation {date}')
fig.show()

In [21]:
# Plot showing variation over latitude over time 

# rainfall = dataset['rainfall_flux'].sel(lon = 85.25)
# fig = px.imshow(rainfall.Rainf, color_continuous_scale='RdBu_r', origin='lower')
# fig.show()

In [51]:
rainfall = dataset['rainfall_flux'].isel(time = 0) * 3600
rainfaill_stacked = dataset['rainfall_flux'].stack(station = ('lon', 'lat'))
df_rain = rainfall.to_dataframe()
df_rain['station'] = rainfaill_stacked.station.values
df_rain['station_coord'] = np.arange(len(df_rain))
station_values = df_rain['station'].values
df_rain['lon'] = list(list(zip(*station_values))[0])
df_rain['lat'] = list(list(zip(*station_values))[1])


In [65]:
rainfall_stacked = rainfall.stack(station = ('lon', 'lat'))

In [66]:
rainfall_stacked.values

array([nan, nan, nan, ...,  0.,  0.,  0.], dtype=float32)

In [74]:
rainfall_stacked.lon.min()

In [82]:
scl = [[0,"rgb(150,0,90)"],[0.125,"rgb(0, 0, 200)"],[0.25,"rgb(0, 25, 255)"],[0.375,"rgb(0, 152, 255)"],
[0.5,"rgb(44, 255, 150)"],[0.625,"rgb(151, 255, 0)"],
[0.75,"rgb(255, 234, 0)"],[0.875,"rgb(255, 111, 0)"],[1,"rgb(255, 0, 0)"]]

df = pd.DataFrame()
df['rainfall'] = rainfall_stacked
df['lon'] = rainfall_stacked.lon
df['lat'] = rainfall_stacked.lat
fig = go.Figure(data=go.Scattergeo(
    lon = df['lon'],
    lat = df["lat"],
    text= df['rainfall'].astype(str) + ' mm $h^{-1}$',
    marker = dict(
        color = df['rainfall'],
        #color_continuous_scale=px.colors.sequential.Viridis,
        colorscale  = scl,
        reversescale = True,
        opacity = 0.7,
        size = 2,
        colorbar = dict(
            titleside = "right",
            outlinecolor = "rgba(68, 68, 68, 0)",
            ticks = "outside",
            showticksuffix = "last",
            dtick = 0.1
        )
    )
))

fig.update_layout(
    geo = dict(
        scope = 'asia',
        showland = True,
        landcolor = "rgb(212, 212, 212)",
        subunitcolor = "rgb(255, 255, 255)",
        countrycolor = "rgb(255, 255, 255)",
        showlakes = True,
        lakecolor = "rgb(255, 255, 255)",
        showsubunits = True,
        showcountries = True,
        resolution = 50,
        projection = dict(
            type = 'mercator',
            rotation_lon = 0
        )
    ),
    title='HMA Precipitation 01-01-1990',
)
fig.update_geos(visible = False, showcountries = True, countrycolor = 'black', showsubunits = True, subunitcolor = 'Blue')
fig.show()

In [8]:
# Importing catchment polygons for lv5 from Hydroshed
gdf = gpd.read_file('/Users/jacobqs/Documents/MST Himalaya/QGIS/hybas_as_lev01-12_v1c/hybas_as_lev05_v1c.shp')

In [9]:
# Get temperature data to a Pandas dataframe
temp_ds = dataset['temperature'].to_dataframe()
temp_ds

In [None]:
# Merge the shapefile with the temperature data
merged = gdf.merge(temp_ds, on='ID')

In [None]:
# Set up the figure
fig, ax = plt.subplots(1, figsize=(10, 6))

# Plot the temperature data
merged.plot(column='temperature', cmap='RdYlBu', ax=ax)

# Add a title
ax.set_title('Temperature Map', fontdict={'fontsize': '25', 'fontweight' : '3'})

# Show the plot
plt.show()