## **GEOG/ESO 230: Reading and writing different types of binary files**

In [None]:
!git clone https://github.com/atkinsonde/230_2025.git
# remember to click the "refresh" button to see the folder appear on the left

In [2]:
dir = r"230_2025/binary_class_notebook/"

## Part 1: open a binary file for writing, reading

In [5]:
#######  PART 1
fn = "test.b"

with open(dir+fn, "wb") as file:
    # byte format - recall the ASCII table
    byte_data = b"\x55\x56\x69\x63\x0D\x0A" # let's make something up ** NOTE THE b to indicate this string is binary encoded
    file.write(byte_data)

### You must explicitly close the file!! This is true anytime you use open()
### the with statement automatically closes the file

# Have a look at what we wrote:
with open(dir+fn,"rb") as file:
    zz=file.read()
    print(zz)

b'UVic\r\n'


## Part 2: write integers in binary form

In [6]:
#######  PART 2 write integers in binary form
## why do this? less storage space, faster access

import array

fn = "test_num.bin"
vals = [10,20,30,40,50]
data = array.array("B", vals)

with open(dir+fn, "wb") as file:
    file.write(data.tobytes())

with open(dir+fn,"rb") as file:
    zz  = file.read()
    zzn = [x for x in zz]
    print(zzn)


[10, 20, 30, 40, 50]


##  Part 3: write integers as strings in binary form

In [7]:
#######  PART 3 write integers as strings in binary form
fn = "test_num_B.bin"
vals = ["10", "20", "30", "40", "50"]
#vals_b = [x.encode('utf-8') for x in vals ]

with open(dir+fn, "wb") as file:
    [file.write(x.encode('utf-8')) for x in vals]

with open(dir+fn,"rb") as file:
    zz=file.read()
    print(zzn)


[10, 20, 30, 40, 50]


## Part 4: Read an "HDF" file (hierarchical data format)

In [None]:
#######  PART 4  HDF read
import h5py
import pandas as pd
import numpy as np

dir_n = r"230_2025/binary_class_notebook/nimbus_hdf/"
fn    = "NmHRIR1H.19660823_01-50-12_1332_002.hdf.xml"
nimbus_meta = pd.read_xml(dir_n+fn)
nimbus_meta.iloc[2,:]

fn = "NmHRIR1H.19660823_01-50-12_1332_002.hdf"

f = h5py.File(dir_n+fn, 'r')

f.keys()
f['Sat Longitude']

ds = np.array(f['HRIR-Temp']) # creates a numpy array from an HDF dataset
ds.shape
ds.dtype
ds[100,100]

#let's make a quick "heatmap" plot using some other python libraries:
from matplotlib import pyplot as plt
import seaborn as sns
sns.heatmap(ds)
plt.show()
# it seems there are some huge values in the set which are wrecking the plot

# we need to remove them using a conditional:
dsf = np.where(ds<350,ds,200)
sns.heatmap(dsf)
plt.show()


## Part 5: read a "netCDF" file (NETwork Common Data Format)

In [None]:
!pip install netCDF4

In [7]:
import numpy as np
import seaborn as sns

In [None]:
import netCDF4 as nc

dir_nc = r"230_2025/binary_class_notebook/netCDF/"
fn = "vwnd.10m.1979.nc"

ds = nc.Dataset(dir_nc+fn)
print(ds)

print(ds.variables.keys()) # get all variable names
ds['vwnd']
t1 = ds['vwnd'][0,:,:]

t1 = np.where(t1>-100,t1,-50)
sns.heatmap(t1)
plt.show()

t2 = ds['vwnd'][:,90,100]
pd.DataFrame(t2).plot()


##  PART 6  Grib read

In [None]:
fn = "CMC_hrdps_west_TMP_TGL_2_ps2.5km_2020031312_P000-00.grib2"

import xarray as xr
ds = xr.load_dataset(dir_nc+fn, engine='cfgrib')
import cfgrib