In [1]:
#!/usr/bin/env python3
# Fiddler Environment Statistics
# This notebook extracts hierarchical information about projects, models, and features from a Fiddler environment

import pandas as pd
import fiddler as fdl
from typing import Dict, List, Any
URL = 'https://customer.fiddler.ai' # Example: 'https://your_company_name.fiddler.ai'
TOKEN = ''  # Get this from the Settings > Credentials tab in Fiddler UI
EXPORT_CSV = True

fdl.init(url=URL, token=TOKEN, auto_attach_log_handler=False)

print(f"Successfully connected to Fiddler at {URL}")
print(f"Client version:    {fdl.__version__}")
print(f"Server version:    {fdl.conn.server_version}")
print(f"Organization ID:   {fdl.conn.organization_id}")
print(f"Organization name: {fdl.conn.organization_name}")



Successfully connected to Fiddler at https://thumbtack.fiddler.ai
Client version:    3.7.0
Server version:    25.6.1
Organization ID:   5307f043-ad0c-42e1-b5b2-9113fb6eb760
Organization name: thumbtack


In [None]:
def get_fiddler_hierarchy() -> Dict:
    """
    Extract hierarchical information about projects, models, and features
    
    Returns:
        Dictionary containing the hierarchical structure of projects, models, and features
    """
    # Initialize hierarchy structure
    hierarchy = {
        'projects': {},
        'total_projects': 0,
        'total_models': 0,
        'total_features': 0
    }
    
    # Get all projects
    projects = list( fdl.Project.list())
    hierarchy['total_projects'] = len(projects)
    
    # Iterate through each project
    for project in projects:
        project_name = project.name
        project_id = project.id
        
        # Initialize project entry in hierarchy
        hierarchy['projects'][project_name] = {
            'id': project_id,
            'models': {},
            'model_count': 0,
            'feature_count': 0
        }
        
        try:
            # Get all models for this project
            models = list( fdl.Model.list(project_id=project_id) )
            hierarchy['projects'][project_name]['model_count'] = len(models)
            hierarchy['total_models'] += len(models)
            
            # Iterate through each model
            for model in models:
                model_name = model.name
                model_id = model.id
                
                try:
                    
                    if not hasattr(model, 'spec'):
                        model = model.fetch()  # For ModelCompact objects
            
                    # Extract input features from model specification
                    features = model.spec.inputs if hasattr(model, 'spec') else []
                    
                    # Add model to project hierarchy
                    hierarchy['projects'][project_name]['models'][model_name] = {
                        # 'id': model_id,
                        'features': features,
                        'feature_count': len(features)
                    }
                    
                    # Update counts
                    hierarchy['projects'][project_name]['feature_count'] += len(features)
                    hierarchy['total_features'] += len(features)
                except Exception as e:
                    print(f"Error getting info for model {model_name} in project {project_name}: {str(e)}")
                    hierarchy['projects'][project_name]['models'][model_name] = {
                        # 'id': model_id,
                        'features': [],
                        'feature_count': 0,
                        'error': str(e)
                    }
        except Exception as e:
            print(f"Error listing models for project {project_name}: {str(e)}")
    
    return hierarchy

# Get hierarchy information
hierarchy = get_fiddler_hierarchy()

hierarchy

In [8]:
def print_hierarchy_summary(hierarchy: Dict) -> None:
    """
    Print a summary of the hierarchy structure
    
    Args:
        hierarchy: Hierarchical structure of projects, models, and features
    """
    print(f"Total Projects: {hierarchy['total_projects']}")
    print(f"Total Models: {hierarchy['total_models']}")
    print(f"Total Features: {hierarchy['total_features']}\n")
    
    for project_name, project_info in hierarchy['projects'].items():
        continue
        print(f"Project: {project_name}")
        print(f"  Models: {project_info['model_count']}")
        print(f"  Features: {project_info['feature_count']}")
        
        for model_name, model_info in project_info['models'].items():
            print(f"    Model: {model_name}")
            print(f"      Features: {model_info['feature_count']}")
            if model_info['feature_count'] > 0:
                for feature in model_info['features']:
                    print(f"        - {feature}")
        print()


def create_feature_dataframe(hierarchy: Dict) -> pd.DataFrame:
    """
    Create a dataframe with project, model, and feature information
    
    Args:
        hierarchy: Hierarchical structure of projects, models, and features
        
    Returns:
        Pandas DataFrame with project, model, and feature columns
    """
    data = []
    
    for project_name, project_info in hierarchy['projects'].items():
        for model_name, model_info in project_info['models'].items():
            for feature in model_info['features']:
                data.append({
                    'project': project_name,
                    'model': model_name,
                    'feature': feature
                })
    
    return pd.DataFrame(data)

def get_statistics(feature_df: pd.DataFrame) -> Dict:
    """
    Get comprehensive statistics for projects and models using the feature dataframe
    
    Args:
        feature_df: Dataframe with project, model, and feature information
        
    Returns:
        Dictionary containing dataframes with project and model statistics
    """
    # Get model-level statistics by grouping features by project and model
    model_stats = feature_df.groupby(['project', 'model']).size().reset_index(name='feature_count')
    
    # Get project-level statistics by grouping models by project
    project_stats = model_stats.groupby('project').agg(
        model_count=('model', 'nunique'),
        feature_count=('feature_count', 'sum')
    ).reset_index()
    
    # Calculate high-level analytics
    summary_stats = {
        # Project-level statistics
        'mean_avg_models_per_project': project_stats['model_count'].mean(),
        'median_models_per_project': project_stats['model_count'].median(),
        'min_models_per_project': project_stats['model_count'].min(),
        'max_models_per_project': project_stats['model_count'].max(),
        
        # Model-level statistics
        'mean_avg_features_per_model': model_stats['feature_count'].mean(),
        'median_features_per_model': model_stats['feature_count'].median(),
        'min_features_per_model': model_stats['feature_count'].min(),
        'max_features_per_model': model_stats['feature_count'].max()
    }
    
    return {
        'project_stats': project_stats,
        'model_stats': model_stats,
        'summary_stats': summary_stats
    }


In [9]:
# Create feature dataframe
feature_df = create_feature_dataframe(hierarchy)
print(f"Feature DataFrame Shape: {feature_df.shape}")

# Save to CSV if output file specified
if EXPORT_CSV:
    feature_df.to_csv('feature_df.csv', index=False)
    print(f"Feature dataframe saved to : feature_df.csv")

# Print summary
print_hierarchy_summary(hierarchy)

stats = get_statistics(feature_df=feature_df)

for k,v in stats['summary_stats'].items():
    print(f"{k}: {v}")


Feature DataFrame Shape: (8762, 3)
Feature dataframe saved to : feature_df.csv
Total Projects: 21
Total Models: 182
Total Features: 9098

mean_avg_models_per_project: 9.529411764705882
median_models_per_project: 3.0
min_models_per_project: 1
max_models_per_project: 62
mean_avg_features_per_model: 54.08641975308642
median_features_per_model: 62.0
min_features_per_model: 1
max_features_per_model: 109


In [12]:
print("\nModels per Project:")
print(stats['project_stats'].sort_values('model_count', ascending=False))


Models per Project:
                 project  model_count  feature_count
12               sandbox           62           3296
3     matching_p_contact           45           3792
15             thumbtack            7              8
8                pricing            6            126
16            tsp_openai            6              9
4                 mm_tte            6            117
5          p_contact_dcn            3            186
6           p_contact_mc            3            222
1            feat_sel_v5            3            216
9   pro_optimization_ppr            3            105
11        ratio_features            3            216
13                 sasha            3              3
14        testtensorflow            3            186
2      hire_pii_analyzer            3              3
0            feat_sel_v4            3            222
7                ppr_mvt            2             52
10   quickstart_examples            1              3


In [11]:
LIMIT = 10


print(f"\nTop {LIMIT} Models by Feature Count:")
print(stats['model_stats'].sort_values('feature_count', ascending=False).head(LIMIT))



Top 10 Models by Feature Count:
               project                        model  feature_count
36  matching_p_contact      production_rev_v3_20225            109
51  matching_p_contact         staging_rev_v3_20225            109
22  matching_p_contact  development_rev_v3_20225_pb            109
21  matching_p_contact     development_rev_v3_20225            109
47  matching_p_contact        staging_pbrev_v3_2025            109
37  matching_p_contact   production_rev_v3_20225_pb            109
17  matching_p_contact    development_pbrev_v3_2025            109
52  matching_p_contact      staging_rev_v3_20225_pb            109
32  matching_p_contact     production_pbrev_v3_2025            109
43  matching_p_contact       staging_hprof_jdv3_rev            105
