In [None]:
import azure.storage.blob

#URL = r'https://datavillagesa.blob.core.windows.net/volve?sv=2018-03-28&sr=c&sig=2CCvEBPWiojChDE1N%2BJw%2ByZ%2Fn5oMf3iXSEUZBm5XMq8%3D&se=2022-03-13T23%3A04%3A36Z&sp=rl'
URL = r'https://datavillagesa.blob.core.windows.net/northernlights?sv=2018-03-28&sr=c&sig=VTWTxWY%2BT7KQ8Y3m93%2B298%2FUjVMi6ebEyEee%2Ffu16SY%3D&se=2022-03-03T22%3A16%3A48Z&sp=rl'

container = azure.storage.blob.ContainerClient.from_container_url(URL)

files = []
for blob in container.list_blobs():
    files.append((blob.name, blob.size))

(names, sizes) = zip(*files)
total_size = sum(sizes)

MB = 2 ** 20
TB = 2 ** 40

print(f'{len(files)} files, {total_size} bytes, {total_size/MB:.2f} MB, {total_size/TB:.2f} TB')

In [None]:
import collections
import os

tally_bytes = collections.defaultdict(int)
tally_count = collections.defaultdict(int)
for (name, size) in files:
    (_, ext) = os.path.splitext(name)
    ext = ext.upper()[1:]
    tally_bytes[ext] += size
    tally_count[ext] += 1

for (ext,_) in sorted(tally_count.items(), key=lambda item: item[1], reverse=True):
    print(f'{ext:<8} {tally_count[ext]:>8} {tally_bytes[ext]:>20,}')


In [None]:
import matplotlib.pyplot as plt
import numpy as np

ext_nfiles_sorted = sorted(tally_count.items(), key=lambda item: item[1], reverse=True)

topcount = 20
toplist = ext_nfiles_sorted[:topcount]
the_rest = ext_nfiles_sorted[topcount:]

(extensions, nfiles) = zip(*toplist)
extensions += ('other',)
nfiles += (sum(tally_count.values()) - sum(nfiles),)

fig, ax = plt.subplots()
bars = ax.barh(extensions, nfiles, align='center')
ax.invert_yaxis()
ax.bar_label(bars)

plt.show()

In [None]:
for (name, size) in files:
    if name.upper().endswith('.LAS'):
        print(f'{size:<20}{name}')

In [None]:
blob_client = container.get_blob_client("31_5-7 Eos/07.Borehole_Seismic/TZV_TIME_SYNSEIS_2020-01-17_2.LAS")
bytes = blob_client.download_blob().content_as_text()
print(bytes)

In [None]:
headerlines = []
datalines = []
lines = bytes.splitlines()
inside_header = True
for line in lines:
    if inside_header:
        headerlines.append(line)
    else:
        datalines.append(line)
    if line.startswith("~A"):
        inside_header = False
    
#print(len(headerlines))
#print(len(datalines))

#print(*headerlines, sep='\n')
curve = []
for row in datalines:
    value = row.split()[1]
    if not value.startswith('-999.25'):
        curve.append(float(value))
plt.plot(curve)



In [None]:
import segyio
import numpy as np
import matplotlib.pyplot as plt

traces = []
filename = 'VSPZO_RAW_2020-01-17_4.SEGY'
with segyio.open(filename, strict=False) as f:
    for trace in f.trace:
        traces.append(list(trace))
plt.imshow(traces, vmin=-0.01, vmax=0.01)
