In [None]:
#| default_exp tabulator 

# Exploring your remote data with tabulator

> Another try using panel 

Ok, let's collect the contents of our falnama-project. 

In [None]:
from fairdatanow import RemoteData2
import os 
import panel as pn 

In [None]:
configuration = {
    'url': "https://laboppad.nl/falnama-project", 
    'user':    os.getenv('NC_AUTH_USER'),
    'password': os.getenv('NC_AUTH_PASS')
}

In [None]:
remote_data = RemoteData2(configuration)
df = remote_data.listdir('falnama-project')

len(df)

Please wait while scanning all file paths in remote folder...


6342

Ok, now we need to turn this dataframe into a Tabulator interactive table. A first step is to add a text search filter. For now, we will implement this following the `pn.bind()` approach as detailed in the tabulator documentation... 

In [None]:
file_table = pn.widgets.Tabulator(df, height=350, pagination=None, show_index=False)

In [None]:
search_filter = pn.widgets.TextInput(name='Search filter', value='xray')

In [None]:
def contains_filter(df, pattern, column):
    if not pattern:
        return df 
    return df[df[column].str.contains(pattern)]

In [None]:
file_table.add_filter(pn.bind(contains_filter, pattern=search_filter, column='path'))    

In [None]:
pn.Column(search_filter, file_table)

This works fine in my Jupyter notebook. Let's see if we can also see this result in the quarto documentation. 

Next thing on my list is to add a counter. Perhaps like so: https://panel.holoviz.org/how_to/interactivity/bind_component.html

To be continued...

## FUNCTIONS 

In [None]:
#| export 

import nc_py_api 
from nc_py_api import Nextcloud 
import panel as pn
import param 
import humanize
import pandas as pd
import os 
import re

In [None]:
#| export 

pn.extension('tabulator')

def _node_to_dataframe2(fsnode): 
    '''Convert `fsnode` object to polars a single row polars dataframe.'''

    df = pd.DataFrame({'path': [fsnode.user_path], 'size': [fsnode.info.size], 'mimetype': [fsnode.info.mimetype], 'modified': [fsnode.info.last_modified], 
                   'isdir': [fsnode.is_dir], 'ext': [os.path.splitext(fsnode.user_path)[1]]})

    return df 

class RemoteData2(object): 
    
    # See: https://help.nextcloud.com/t/using-nc-py-api-i-cant-download-any-file-due-to-ssl-certificte-verify-failed/194019 
    nc_py_api.options.NPA_NC_CERT = False 
    
    # keep full dataframe 
    #itables.options.maxBytes = 0
    #itables.init_notebook_mode()

    def __init__(self, configuration): 
        '''Recursively scan the contents of a remote webdav server as specified by `configuration`. 
        '''

        # parse configuration 
        m = re.match('(^https://[^/]+/)(.*)', configuration['url'])
        nextcloud_url, self.cache_dir = m.groups()
        nc_auth_user = configuration['user']
        nc_auth_pass = configuration['password'] 
               
        # Instantiate Nextcloud client 
        self.nc = Nextcloud(nextcloud_url=nextcloud_url, nc_auth_user=nc_auth_user, nc_auth_pass=nc_auth_pass) 
        

    def listdir(self, subdir=None, search_regex='', searchBuilder={}): 
        '''Create interactive file table for remote subdirectory `subdir`. 

        If subdir is not specified the complete project directory is scanned. 
        '''

        if subdir is None: 
            subdir = self.cache_dir 

        print(f'Please wait while scanning all file paths in remote folder...') 
            
        # query webdav server to obtain file listing 
        fs_nodes_list = self.nc.files.listdir(subdir, depth=-1, exclude_self=False) 
        
        n_paths = len(fs_nodes_list)

        # initialize polars dataframe with first row to fix schema 
        self.df = _node_to_dataframe2(fs_nodes_list[0]) 

        # initially moved these lines below because I do not understand 
        # how this could work after only reading the first line 
        # well, perhaps because this is the size that is listed for the directory  
        
        #sum the sizes to find the total storage space
        total_size_bytes = self.df['size'].sum()
        total_size = humanize.naturalsize(total_size_bytes, True)
        
        for fsnode in fs_nodes_list[1:]: 
            self.df = pd.concat([self.df, _node_to_dataframe2(fsnode)], ignore_index=True) 

        self.df.reset_index()

        return self.df
