Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 56 additions & 41 deletions 03_array.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -98,10 +98,12 @@
"source": [
"# Load data with h5py\n",
"# this creates a pointer to the data, but does not actually load\n",
"import h5py\n",
"import os\n",
"f = h5py.File(os.path.join('data', 'random.hdf5'), mode='r')\n",
"dset = f['/x']"
"\n",
"import h5py\n",
"\n",
"f = h5py.File(os.path.join(\"data\", \"random.hdf5\"), mode=\"r\")\n",
"dset = f[\"/x\"]"
]
},
{
Expand Down Expand Up @@ -134,7 +136,7 @@
"# Compute sum of large array, one million numbers at a time\n",
"sums = []\n",
"for i in range(0, 1_000_000_000, 1_000_000):\n",
" chunk = dset[i: i + 1_000_000] # pull out numpy array\n",
" chunk = dset[i : i + 1_000_000] # pull out numpy array\n",
" sums.append(chunk.sum())\n",
"\n",
"total = sum(sums)\n",
Expand Down Expand Up @@ -174,16 +176,14 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
"tags": []
},
"outputs": [],
"source": [
"sums = []\n",
"lengths = []\n",
"for i in range(0, 1_000_000_000, 1_000_000):\n",
" chunk = dset[i: i + 1_000_000] # pull out numpy array\n",
" chunk = dset[i : i + 1_000_000] # pull out numpy array\n",
" sums.append(chunk.sum())\n",
" lengths.append(len(chunk))\n",
"\n",
Expand Down Expand Up @@ -226,6 +226,7 @@
"outputs": [],
"source": [
"import dask.array as da\n",
"\n",
"x = da.from_array(dset, chunks=(1_000_000,))\n",
"x"
]
Expand Down Expand Up @@ -379,12 +380,13 @@
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import dask.array as da\n",
"import numpy as np\n",
"\n",
"x = da.random.normal(10, 0.1, size=(20000, 20000), # 400 million element array \n",
" chunks=(1000, 1000)) # Cut into 1000x1000 sized chunks\n",
"y = x.mean(axis=0)[::100] # Perform NumPy-style operations"
"x = da.random.normal(\n",
" 10, 0.1, size=(20000, 20000), chunks=(1000, 1000) # 400 million element array\n",
") # Cut into 1000x1000 sized chunks\n",
"y = x.mean(axis=0)[::100] # Perform NumPy-style operations"
]
},
{
Expand All @@ -403,7 +405,7 @@
"outputs": [],
"source": [
"%%time\n",
"y.compute() # Time to compute the result"
"y.compute() # Time to compute the result"
]
},
{
Expand Down Expand Up @@ -535,12 +537,13 @@
"metadata": {},
"outputs": [],
"source": [
"import h5py\n",
"from glob import glob\n",
"import os\n",
"from glob import glob\n",
"\n",
"filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5')))\n",
"dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames]\n",
"import h5py\n",
"\n",
"filenames = sorted(glob(os.path.join(\"data\", \"weather-big\", \"*.hdf5\")))\n",
"dsets = [h5py.File(filename, mode=\"r\")[\"/t2m\"] for filename in filenames]\n",
"dsets[0]"
]
},
Expand All @@ -563,7 +566,7 @@
"import matplotlib.pyplot as plt\n",
"\n",
"fig = plt.figure(figsize=(16, 8))\n",
"plt.imshow(dsets[0][::4, ::4], cmap='RdBu_r');"
"plt.imshow(dsets[0][::4, ::4], cmap=\"RdBu_r\");"
]
},
{
Expand Down Expand Up @@ -628,7 +631,8 @@
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"tags": []
},
"outputs": [],
"source": [
Expand All @@ -640,7 +644,15 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"**Plot the mean of this array along the time (`0th`) axis**"
"**Plot the mean of this array along the time (`0th`) axis**\n",
"\n",
"Complete the following:\n",
"\n",
"```python\n",
"result = ...\n",
"fig = plt.figure(figsize=(16, 8))\n",
"plt.imshow(result, cmap='RdBu_r')\n",
"```"
]
},
{
Expand All @@ -652,25 +664,22 @@
]
},
"outputs": [],
"source": [
"# complete the following:\n",
"fig = plt.figure(figsize=(16, 8))\n",
"plt.imshow(..., cmap='RdBu_r')"
]
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"jupyter": {
"source_hidden": true
}
},
"tags": []
},
"outputs": [],
"source": [
"result = x.mean(axis=0)\n",
"fig = plt.figure(figsize=(16, 8))\n",
"plt.imshow(result, cmap='RdBu_r');"
"plt.imshow(result, cmap=\"RdBu_r\");"
]
},
{
Expand Down Expand Up @@ -699,7 +708,7 @@
"source": [
"result = x[0] - x.mean(axis=0)\n",
"fig = plt.figure(figsize=(16, 8))\n",
"plt.imshow(result, cmap='RdBu_r');"
"plt.imshow(result, cmap=\"RdBu_r\");"
]
},
{
Expand Down Expand Up @@ -756,21 +765,22 @@
},
"outputs": [],
"source": [
"import h5py\n",
"from glob import glob\n",
"import os\n",
"from glob import glob\n",
"\n",
"import dask.array as da\n",
"import h5py\n",
"\n",
"filenames = sorted(glob(os.path.join('data', 'weather-big', '*.hdf5')))\n",
"dsets = [h5py.File(filename, mode='r')['/t2m'] for filename in filenames]\n",
"filenames = sorted(glob(os.path.join(\"data\", \"weather-big\", \"*.hdf5\")))\n",
"dsets = [h5py.File(filename, mode=\"r\")[\"/t2m\"] for filename in filenames]\n",
"\n",
"arrays = [da.from_array(dset, chunks=(500, 500)) for dset in dsets]\n",
"\n",
"x = da.stack(arrays, axis=0)\n",
"\n",
"result = x[:, ::2, ::2]\n",
"\n",
"da.to_zarr(result, os.path.join('data', 'myfile.zarr'), overwrite=True)"
"da.to_zarr(result, os.path.join(\"data\", \"myfile.zarr\"), overwrite=True)"
]
},
{
Expand All @@ -797,23 +807,27 @@
"source": [
"import numpy as np\n",
"\n",
"\n",
"# make a random collection of particles\n",
"def make_cluster(natoms, radius=40, seed=1981):\n",
" np.random.seed(seed)\n",
" cluster = np.random.normal(0, radius, (natoms,3))-0.5\n",
" cluster = np.random.normal(0, radius, (natoms, 3)) - 0.5\n",
" return cluster\n",
"\n",
"\n",
"def lj(r2):\n",
" sr6 = (1./r2)**3\n",
" pot = 4.*(sr6*sr6 - sr6)\n",
" sr6 = (1.0 / r2) ** 3\n",
" pot = 4.0 * (sr6 * sr6 - sr6)\n",
" return pot\n",
"\n",
"\n",
"# build the matrix of distances\n",
"def distances(cluster):\n",
" diff = cluster[:, np.newaxis, :] - cluster[np.newaxis, :, :]\n",
" mat = (diff*diff).sum(-1)\n",
" mat = (diff * diff).sum(-1)\n",
" return mat\n",
"\n",
"\n",
"# the lj function is evaluated over the upper triangle\n",
"# after removing distances near zero\n",
"def potential(cluster):\n",
Expand Down Expand Up @@ -886,11 +900,12 @@
"source": [
"import dask.array as da\n",
"\n",
"\n",
"# compute the potential on the entire\n",
"# matrix of distances and ignore division by zero\n",
"def potential_dask(cluster):\n",
" d2 = distances(cluster)\n",
" energy = da.nansum(lj(d2))/2.\n",
" energy = da.nansum(lj(d2)) / 2.0\n",
" return energy"
]
},
Expand All @@ -909,7 +924,7 @@
"source": [
"from os import cpu_count\n",
"\n",
"dcluster = da.from_array(cluster, chunks=cluster.shape[0]//cpu_count())"
"dcluster = da.from_array(cluster, chunks=cluster.shape[0] // cpu_count())"
]
},
{
Expand Down Expand Up @@ -974,7 +989,7 @@
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
Expand All @@ -988,7 +1003,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
"version": "3.10.4"
}
},
"nbformat": 4,
Expand Down