## Trying to work up a notebook to help KT Ramesh find data in the portal

In [1]:
# Step 1: Install dependencies if not already installed
# %pip install girder-client pandas

import os
from girder_client import GirderClient
import pandas as pd

In [2]:
# Connect to the Girder instance using the API key
client = GirderClient(apiUrl=os.environ["GIRDER_API_URL"])
client.authenticate(apiKey=os.environ["GIRDER_API_KEY"])

{'_id': '6424ac394236ff9b0883f243'}

In [3]:
# Step 3: Query Girder for files containing 'SiC' in filenames
def query_files(search_term="SiC", limit=1000):
    query_results = client.get("resource/search", parameters={
        "q": search_term,
        "types": "[\"item\"]",
        "limit": limit
    })

    files = []
    for result in query_results["item"]:
        item_id = result["_id"]
        item_name = result["name"]

        # Fetch file details within the item
        item_files = client.get(f"item/{item_id}/files")
        for file in item_files:
            if search_term.lower() in file['name'].lower():
                files.append({
                    'Item Name': item_name,
                    'File Name': file['name'],
                    'File ID': file['_id'],
                    'Size (KB)': round(file['size']/1024, 2)
                })
    
    return pd.DataFrame(files)


In [4]:
# Execute query
df_sic_files = query_files("SiC")

In [5]:
# Display results
df_sic_files

Unnamed: 0,Item Name,File Name,File ID,Size (KB)
0,SiC Polytypes.zip,SiC Polytypes.zip,66aa787fbcc722774ce5e69f,62634.2


### Expand the query to get the path and uploading user:

In [13]:
# Improved helper function to clearly separate collection and folder paths
def get_collection_and_path(item_id):
    path_data = client.get(f"resource/{item_id}/path", parameters={"type": "item"})
    if isinstance(path_data, dict) and 'path' in path_data:
        path_elements = path_data['path']
        if len(path_elements) >= 2:
            collection = path_elements[0]['object']['name']
            folder_path = '/'.join([entry['object']['name'] for entry in path_elements[1:-1]])
        elif len(path_elements) == 1:
            collection = path_elements[0]['object']['name']
            folder_path = ""
        else:
            collection = "Unknown"
            folder_path = ""
        return collection, folder_path
    return "Unknown", "Unknown"

# Helper function to get uploader name
def get_uploader_name(file_id):
    file_data = client.get(f"file/{file_id}")
    creator_id = file_data.get('creatorId')
    if creator_id:
        user_data = client.get(f"user/{creator_id}")
        return f"{user_data.get('firstName', '')} {user_data.get('lastName', '')}".strip()
    return "Unknown"

In [9]:
# Updated helper function to build collection path safely
def get_item_path(item_id):
    path_data = client.get(f"resource/{item_id}/path", parameters={"type": "item"})
    # Ensure path_data is a dictionary with a 'path' key
    if isinstance(path_data, dict) and 'path' in path_data:
        path = '/'.join([entry['object']['name'] for entry in path_data['path']])
        return path
    return "Unknown Path"


# Helper function to get uploader name
def get_uploader_name(file_id):
    file_data = client.get(f"file/{file_id}")
    creator_id = file_data.get('creatorId')
    if creator_id:
        user_data = client.get(f"user/{creator_id}")
        return f"{user_data.get('firstName', '')} {user_data.get('lastName', '')}".strip()
    return "Unknown"

### Redo the Query with the path and user info:

In [14]:
# Step 3: Query Girder for files containing 'SiC' in filenames
def query_files(search_term="SiC", limit=1000):
    query_results = client.get("resource/search", parameters={
        "q": search_term,
        "types": "[\"item\"]",
        "limit": limit
    })

    files = []
    for result in query_results["item"]:
        item_id = result["_id"]
        item_name = result["name"]
        collection, folder_path = get_collection_and_path(item_id)

        # Fetch file details within the item
        item_files = client.get(f"item/{item_id}/files")
        for file in item_files:
            if search_term.lower() in file['name'].lower():
                uploader_name = get_uploader_name(file['_id'])
                files.append({
                    'Item Name': item_name,
                    'File Name': file['name'],
                    'File ID': file['_id'],
                    'Size (KB)': round(file['size']/1024, 2),
                    'Collection': collection,
                    'Folder Path': folder_path,
                    'Uploader': uploader_name
                })
    
    return pd.DataFrame(files)

In [19]:
# Execute query
df_sic_files = query_files("SiC")

In [20]:
# Display results
df_sic_files

Unnamed: 0,Item Name,File Name,File ID,Size (KB),Collection,Folder Path,Uploader
0,SiC Polytypes.zip,SiC Polytypes.zip,66aa787fbcc722774ce5e69f,62634.2,Unknown,Unknown,Dan Gianola


In [21]:
# Execute query
df_nb_files = query_files("Nb")

In [22]:
# Display results
df_nb_files

Unnamed: 0,Item Name,File Name,File ID,Size (KB),Collection,Folder Path,Uploader
0,Nb foil.osc,Nb foil.osc,6481e47223e04ab3d99445b9,90558.85,Unknown,Unknown,HTMDEC Admin
1,Nb foil.png,Nb foil.png,6481fd4423e04ab3d9944a74,670.7,Unknown,Unknown,HTMDEC Admin
2,Nb foil.png,Nb foil.png,654a5f8915a62d3ccbcd8d0a,670.7,Unknown,Unknown,HTMDEC Admin
3,Nb foil.osc,Nb foil.osc,654a5f8915a62d3ccbcd8d08,90558.85,Unknown,Unknown,HTMDEC Admin
4,Nb foil - lateral.png,Nb foil - lateral.png,654a5f8915a62d3ccbcd8d10,135.04,Unknown,Unknown,HTMDEC Admin
5,Nb foil - lateral.png,Nb foil - lateral.png,6481fd4623e04ab3d9944a7d,135.04,Unknown,Unknown,HTMDEC Admin
6,Nb foil - lateral.osc,Nb foil - lateral.osc,654a5f8915a62d3ccbcd8d06,16880.94,Unknown,Unknown,HTMDEC Admin
7,Nb foil - lateral.osc,Nb foil - lateral.osc,6481e47223e04ab3d99445b7,16880.94,Unknown,Unknown,HTMDEC Admin
8,Nb foil - region 2.png,Nb foil - region 2.png,654a5f8915a62d3ccbcd8d0c,402.88,Unknown,Unknown,HTMDEC Admin
9,Nb foil - region 2.osc,Nb foil - region 2.osc,654a5f8915a62d3ccbcd8d04,51272.81,Unknown,Unknown,HTMDEC Admin
