Copy Wayne's
```
/allen/aibs/technology/waynew/behavior/behavior_only_nwb/20210227_visual_behavior_big_table.csv
```
into `data/`

Define a class (that really just wraps pandas) to do the manipulation of the metadata

In [3]:
import pandas as pd
import requests
import requests_toolbelt
import json
import os
from contextlib import closing

In [6]:
class MetadataCache(object):

    def update_manifest(self):
        """
        Write contents of self.manifest to self.manifest_path
        """
        with open(self.manifest_path, 'w') as out_file:
            out_file.write(json.dumps(self.manifest, indent=2, sort_keys=True))


    def download_file(self, file_url, file_version):
            """
            file_url -- url to file
            file_version -- string denoting version of the file to download
                            (not actually sure how we are going to do versioning)

            Returns
            -------
            Path to the downloaded file
            """

            if file_version not in self.manifest:
                self.manifest[file_version] = {}
            if file_url in self.manifest[file_version]:
                return self.manifest[file_version][file_url]

            print(f"Actually downloading f{file_url} version f{file_version}")

            # create a sub directory for files with this version tag
            version_dir = os.path.join(self.cache_dir, file_version)
            if not os.path.exists(version_dir):
                os.makedirs(version_dir)
            if not os.path.isdir(version_dir):
                raise RuntimeError(f"\nf{version_dir}\nis not a directory")
            
            # the path where we will actually store the downloaded file
            file_path = os.path.join(version_dir,
                                     os.path.basename(file_url)+'.nwb')
            
            # make sure it does not exist yet
            if os.path.exists(file_path):
                raise RuntimeError(f"\ntrying to create {file_path}\nbut it already exists")
            
            # download the file
            with closing(requests.get(file_url, stream=True)) as response:
                response.raise_for_status()
                with open(file_path, 'wb') as out_file:
                    request_toolbelt.stream.stream_response_to_file(response, file_path)
           
            # update the manifest
            self.manifest[file_version][file_url] = file_path
            self.update_manifest()

            return file_path
                  
            
    def __init__(self,
                 metadata_file = 'data/20210227_visual_behavior_big_table.csv',
                 cache_dir = None):
        """
        metadata_file -- path to the metadata csv file
        manifest_dir -- directory where manifest of donwloaded files will be kept
        """

        self._warehouse_host = 'localhost:8080'
        
        self.dataframe = pd.read_csv(metadata_file)

        if cache_dir is None:
            this_dir = os.path.dirname(os.path.abspath(__file__))
            cache_dir = os.path.join(this_dir, 'data/cache')
        self.cache_dir = cache_dir

        if not os.path.exists(self.cache_dir):
            os.makedirs(self.cache_dir)

        if not os.path.isdir(self.cache_dir):
            raise RuntimeError(f"\ncache_dir\n{self.cache_dir}\nis not a dir")

        self.manifest_path = os.path.join(self.cache_dir, 'manifest.json')
        if os.path.exists(self.manifest_path):
            if not os.path.isfile(self.manifest_path):
                raise RuntimeError(f"\nmanifest_path\n{self.manifest_path}\nis not a file")
            with open(self.manifest_path, 'rb') as in_file:
                self.manifest = json.load(in_file)
        else:
            self.manifest = {}
            self.update_manifest()
