## Count power values by mouse and fiber number
This notebook queries the session metadata to count the number of sesions with each unique power value by fiber

First connect to the database

In [2]:
from aind_data_access_api.document_db import MetadataDbClient
import pandas as pd

# Configure pandas to display all columns
pd.set_option('display.max_columns', None)

# Initialize the client
client = MetadataDbClient(
    host="api.allenneuraldynamics.org",
    database="metadata_index",
    collection="data_assets",
)

Generate a query and pack the results into a dataframe

In [6]:
pipeline = [
    # Match documents for the specific subjects and with fiber photometry sessions
    {"$match": {
        "subject.subject_id": {"$in": ["749472", "749624", "754895", "754896", "754898"]},
        "session.data_streams.fiber_connections": {"$exists": True}
    }},
    # Unwind the data_streams array
    {"$unwind": "$session.data_streams"},
    # Unwind the fiber_connections array
    {"$unwind": "$session.data_streams.fiber_connections"},
    # Group by subject_id, fiber_number, and power value
    {"$group": {
        "_id": {
            "subject_id": "$subject.subject_id",
            "fiber_number": "$session.data_streams.fiber_connections.fiber_name",
            "power": "$session.data_streams.fiber_connections.patch_cord_output_power"
        },
        "count": {"$sum": 1}
    }},
    # Sort by subject_id, fiber_number, and count
    {"$sort": {
        "_id.subject_id": 1,
        "_id.fiber_number": 1,
        "count": -1
    }}
]

# Run the aggregation pipeline
results = client.aggregate_docdb_records(pipeline=pipeline)

# Convert to DataFrame and reshape for better visualization
df = pd.DataFrame(list(results))

# Extract the nested _id fields and create a MultiIndex
df_reshaped = pd.DataFrame({
    'count': df['count'].values,
    'subject_id': df['_id'].apply(lambda x: x['subject_id']),
    'fiber_number': df['_id'].apply(lambda x: x['fiber_number']),
    'power': df['_id'].apply(lambda x: x['power'])
})

# Create pivot table with MultiIndex
pivot_df = df_reshaped.pivot_table(
    values='count',
    index=['subject_id', 'fiber_number'],
    columns='power',
    fill_value=0
)

print("Power value counts by subject and fiber number:")
display(pivot_df)

Power value counts by subject and fiber number:


Unnamed: 0_level_0,power,20
subject_id,fiber_number,Unnamed: 2_level_1
749472,Fiber 0,11.0
749472,Fiber 1,11.0
749472,Fiber 2,11.0
749472,Fiber 3,11.0
749624,Fiber 0,19.0
749624,Fiber 1,19.0
749624,Fiber 2,19.0
749624,Fiber 3,19.0
754895,Fiber 0,3.0
754895,Fiber 1,3.0
