## Uso simple

In [1]:
some_bytes = b"abcd\x01\x02\x03\x04"

with open("/tmp/archivoprueba", "wb") as fh:
    fh.write(some_bytes)

with open("/tmp/archivoprueba", "rb") as fh:
    print(fh.read())

b'abcd\x01\x02\x03\x04'


In [2]:
fh = open("/tmp/archivoprueba", "rb")
# leemos dos bytes
fh.read(2)

b'ab'

In [3]:
# vemos donde está el puntero
fh.tell()

2

In [4]:
# leemos tres bytes más
fh.read(3)

b'cd\x01'

In [5]:
# leemos hasta el final
fh.read()

b'\x02\x03\x04'

In [6]:
with open("/tmp/archivoprueba", "rb") as fh:
    # vamos hasta la cuarta posición
    fh.seek(4)
    # leemos
    print(fh.read(2))

b'\x01\x02'


In [7]:
import os

with open("/tmp/archivoprueba", "rb") as fh:
    # leemos los últimos dos bytes
    fh.seek(-2, os.SEEK_END)
    print(fh.read())

b'\x03\x04'


In [8]:
# escribimos la secuencia que vamos a leer
sequence = (
    b"\x05\x07A\xc3\xb1os"
    b"\x00\x00\x07\xe3\x00\x00\x07\xe7"
    b"\x00\x00\x07\xe2\x00\x00\x07\xd5"
    b"\x00\x00\x07\xc9\x00\x00\x07\xcb"
    b"\x00\x00\x07\xd6"
)
with open("/tmp/archivoprueba", "wb") as fh:
    fh.write(sequence)

# leemos e interpretamos los bytes del archivo
with open("/tmp/archivoprueba", "rb") as fh:
    len_text = int.from_bytes(fh.read(1), byteorder="big")
    quant_numbers = int.from_bytes(fh.read(1), byteorder="big")
    text_bytes = fh.read(len_text)
    print(f"(debug: {text_bytes=})")
    numbers = []
    for _ in range(quant_numbers):
        number = int.from_bytes(fh.read(4), byteorder="big")
        numbers.append(number)
    print(f"(debug: {numbers=})")

text = text_bytes.decode("utf8")
num_sequence = ', '.join(map(str, numbers))
print(f"{text}: {num_sequence}")

(debug: text_bytes=b'A\xc3\xb1os')
(debug: numbers=[2019, 2023, 2018, 2005, 1993, 1995, 2006])
Años: 2019, 2023, 2018, 2005, 1993, 1995, 2006


In [9]:
import struct

with open("/tmp/archivoprueba", "rb") as fh:
    len_text, quant_numbers = struct.unpack("BB", fh.read(2))
    complex_format = f">{len_text}s{quant_numbers}i"
    text_bytes, *numbers = struct.unpack(complex_format, fh.read())
    print(f"(debug: {text_bytes=})")
    print(f"(debug: {numbers=})")

text = text_bytes.decode("utf8")
num_sequence = ', '.join(map(str, numbers))
print(f"{text}: {num_sequence}")

(debug: text_bytes=b'A\xc3\xb1os')
(debug: numbers=[2019, 2023, 2018, 2005, 1993, 1995, 2006])
Años: 2019, 2023, 2018, 2005, 1993, 1995, 2006


In [10]:
from collections import namedtuple

IHDR_struct = '>IIbbbbb'
IHDR = namedtuple('IHDR', 'width height bit_depth color_type compression filter interlace')

with open("logo.png", "rb") as fh:
    header = fh.read(8)
    assert header == b"\x89PNG\r\n\x1A\n"

    while True:
        octet = fh.read(8)
        if not octet:
            break
        length, chunk_type = struct.unpack(">I4s", octet)
        print(f"Chunk {chunk_type!r} len={length}")
        chunk_data = fh.read(length)
        if chunk_type == b"IHDR":
            ihdr = IHDR._make(struct.unpack(IHDR_struct, chunk_data))
            print(f"    width={ihdr.width} height={ihdr.height}")

        fh.read(4)  # CRC, not for this example


Chunk b'IHDR' len=13
    width=1280 height=640
Chunk b'pHYs' len=9
Chunk b'tEXt' len=25
Chunk b'IDAT' len=8192
Chunk b'IDAT' len=8192
Chunk b'IDAT' len=8192
Chunk b'IDAT' len=8192
Chunk b'IDAT' len=1393
Chunk b'IEND' len=0


## Trabajando con HDF5

Primero descargar el archivo necesario de [esta página](https://data.nrel.gov/submissions/70).

In [11]:
import h5py

fh = h5py.File("all.h5", "r")
fh.keys()

<KeysViewHDF5 ['datetime', 'latitude', 'longitude', 'mean', 'n', 'std', 'wk']>

In [12]:
for dataset in fh.values():
    print(dataset)

<HDF5 dataset "datetime": shape (1, 61368), type "|S14">
<HDF5 dataset "latitude": shape (581777,), type "<f4">
<HDF5 dataset "longitude": shape (581777,), type "<f4">
<HDF5 dataset "mean": shape (581777, 1), type "<f4">
<HDF5 dataset "n": shape (581777, 1), type "<i4">
<HDF5 dataset "std": shape (581777, 1), type "<f4">
<HDF5 dataset "wk": shape (581777, 1), type "<f4">


In [13]:
print("datetime:", fh["datetime"][0])  # el primer elemento tiene todo el array
print("latitude:", fh["latitude"][42])  # aquí lo tenemos directamente
print("mean:", fh["mean"][123])  # y acá cada valor está dentro de una lista

fh["latitude"][:20]

datetime: [b'20110801230000' b'20110801220000' b'20110801210000' ...
 b'20110402020000' b'20110402010000' b'20110402000000']
latitude: 24.026184
mean: [7.5135026]


array([24.01368 , 24.014069, 24.01445 , 24.014832, 24.01519 , 24.015564,
       24.015923, 24.016289, 24.016632, 24.01699 , 24.017326, 24.017662,
       24.018005, 24.018349, 24.01867 , 24.018997, 24.019302, 24.019623,
       24.019936, 24.020256], dtype=float32)

In [14]:
print("Dimensiones sin nombre:")
for dim in fh["mean"].dims:
    print("    ", dim)
print("Tampoco hay atributos:", len(fh["latitude"].attrs))

Dimensiones sin nombre:
     <"" dimension 0 of HDF5 dataset at 129675587088320>
     <"" dimension 1 of HDF5 dataset at 129675587088320>
Tampoco hay atributos: 0


In [15]:
import numpy as np

latitudes = np.array(fh["latitude"])
latitudes

array([24.01368 , 24.014069, 24.01445 , ..., 49.291092, 49.283897,
       49.27669 ], dtype=float32)

In [16]:
lat_27 = (latitudes >= 27) & (latitudes < 28)
np.count_nonzero(lat_27)

52402

In [17]:
means = np.squeeze(fh["mean"])
means

array([7.4238734, 7.4596395, 7.5191693, ..., 9.926428 , 9.930207 ,
       9.933213 ], dtype=float32)

In [18]:
means[lat_27].mean()

6.9832783


### Copyright 2020-2025 Facundo Batista y Manuel Carlevaro

Licencia CC BY-NC-SA 4.0

Para más info visitar: https://github.com/facundobatista/libro-pyciencia/

