In [None]:
import hoss
import os
import time
import hashlib
import tempfile

from hoss import utilities

## Connect to local server
This notebook demonstrates basic operations using a single Hoss server.
For these demo notebooks, it's assumed you have the `admin` role and are running the server locally
on localhost. If using a different server, be sure to change the endpoint in the `.connect()` call.

We start by connecting the the "local" server. 

In [None]:
server_local = hoss.connect('http://localhost')

In [None]:
print("Existing Namespaces:")
print(server_local.list_namespaces())

## Create a dataset
First load the default namespace and then create a dataset inside the namespace

In [None]:
ns = server_local.get_namespace('default')

In [None]:
ds = ns.create_dataset("hash-test", "A dataset for demoing how to check hashes")
ds.display()

## Write a file from disk and check its hash manually

We can access the hash value that is computed by the object store via the `etag` attribute of a dataset object reference.

If a file is written with a single part (be default <= 8MB), then computing the etag is straight forward. Simply compute
the hexdigest of the md5 hash of the file.

Note, the ETag will always be wrapped in `"`.

In [None]:
filename = os.path.join(os.getcwd(), "example-file.txt")

f1 = ds / "example-file.txt"
f1.write_from(filename)

In [None]:
# Check the etag value of the ref
f1.etag

In [None]:
computed_hash = hashlib.md5(open(filename,'rb').read()).hexdigest()
print(computed_hash)

In [None]:
if f1.etag == f'"{computed_hash}"':
    print("hashes match!")
else:
    print("hashes do NOT match!")

## Verify hashes using hoss-client utility function

If a file is larger than the multipart threshold, computing the etag is more complex. The etag becomes
the md5 hexdigest of all part md5 digests concatenated, with `-<num parts>` appended.

To simplify checking if a local file matches that in the remote object store, utility functions are provided to
compute etag values and also to check if a DatasetRef instance matches the contents of a local file.

In [None]:
# Write a small file
with tempfile.NamedTemporaryFile(mode='wt', delete=False) as tf:
    tf.write("this is a small file")
    tf.flush()

    obj = ds / "small-file.dat"
    

In [None]:
# Check file hash function works as expected on a small file
local_hash = utilities.hash_file(tf.name)
print(local_hash)

In [None]:
# Write the file to the object store
obj.write_from(tf.name)

# Verify hash matches
assert utilities.etag_does_match(obj, tf.name)

In [None]:
# Clean up the temp file
os.remove(tf.name)

In [None]:
# Write a file that is about 20MB, which will trigger multipart uploads
with tempfile.NamedTemporaryFile(mode='wt', delete=False) as tf:
    tf.write("1234567890" * 1024 * 2000)
    tf.flush()

    obj = ds / "large-file.dat"
    

In [None]:
# Check file hash function works as expected on a small file
local_hash = utilities.hash_file(tf.name)
print(local_hash)

In [None]:
# Write the file to the object store
obj.write_from(tf.name)

# Verify hash matches
assert utilities.etag_does_match(obj, tf.name)

In [None]:
# Clean up the temp file
os.remove(tf.name)

## Clean up this example

Run these cells to remove the resources created during the test

In [None]:
ns.delete_dataset("hash-test")