In [None]:
#!/usr/bin/env python3
# Fiddler Environment Statistics
# This notebook extracts hierarchical information about projects, models, and features from a Fiddler environment

import pandas as pd
import fiddler as fdl
from typing import Dict, List, Any
URL = 'https://customer.fiddler.ai' # Example: 'https://your_company_name.fiddler.ai'
TOKEN = ''  # Get this from the Settings > Credentials tab in Fiddler UI
EXPORT_CSV = True

fdl.init(url=URL, token=TOKEN, auto_attach_log_handler=False)

print(f"Successfully connected to Fiddler at {URL}")
print(f"Client version:    {fdl.__version__}")
print(f"Server version:    {fdl.conn.server_version}")
print(f"Organization ID:   {fdl.conn.organization_id}")
print(f"Organization name: {fdl.conn.organization_name}")

In [None]:
from sys import version


def get_fiddler_hierarchy() -> Dict:
    """
    Extract hierarchical information about projects, models, and features including timestamps
    Returns: Dictionary containing the hierarchical structure of projects, models, features, and timestamps
    """
    # Initialize hierarchy structure
    hierarchy = {
        'projects': {},
        'total_projects': 0,
        'total_models': 0,
        'total_features': 0
    }
    
    # Get all projects
    projects = list( fdl.Project.list())
    hierarchy['total_projects'] = len(projects)
    
    # Iterate through each project
    for project in projects:
        project_name = project.name
        project_id = project.id
        
        # Initialize project entry in hierarchy
        hierarchy['projects'][project_name] = {
            'id': project_id,
            'models': {},
            'model_count': 0,
            'feature_count': 0
        }
        
        try:
            # Get all models for this project
            models = list( fdl.Model.list(project_id=project_id) )
            hierarchy['projects'][project_name]['model_count'] = len(models)
            hierarchy['total_models'] += len(models)
            
            # Iterate through each model
            for model in models:
                model_name = model.name
                model_id = model.id
                
                try:
                    
                    if not hasattr(model, 'spec'):
                        model = model.fetch()  # For ModelCompact objects
            
                    # Extract input features from model specification
                    features = model.spec.inputs if hasattr(model, 'spec') else []
                    
                    # Extract timestamps - these should be available in the full model object
                    created_at = getattr(model, 'created_at', None)
                    updated_at = getattr(model, 'updated_at', None)
                    
                    # Add model to project hierarchy
                    hierarchy['projects'][project_name]['models'][model_name] = {
                        'id': model_id,
                        'features': features,
                        'feature_count': len(features),
                        'created_at': created_at,
                        'updated_at': updated_at,
                        'version_name': version  # Using model name as version name
                    }
                    
                    # Update counts
                    hierarchy['projects'][project_name]['feature_count'] += len(features)
                    hierarchy['total_features'] += len(features)
                except Exception as e:
                    print(f"Error getting info for model {model_name} in project {project_name}: {str(e)}")
                    hierarchy['projects'][project_name]['models'][model_name] = {
                        'id': model_id,
                        'features': [],
                        'feature_count': 0,
                        'created_at': None,
                        'updated_at': None,
                        'version_name': model_name,
                        'error': str(e)
                    }
        except Exception as e:
            print(f"Error listing models for project {project_name}: {str(e)}")
    
    return hierarchy

# Get hierarchy information
hierarchy = get_fiddler_hierarchy()

hierarchy

In [None]:
def print_hierarchy_summary(hierarchy: Dict) -> None:
    """
    Print a summary of the hierarchy structure
    Args: hierarchy: Hierarchical structure of projects, models, and features
    """
    print(f"Total Projects: {hierarchy['total_projects']}")
    print(f"Total Models:   {hierarchy['total_models']}")
    print(f"Total Features: {hierarchy['total_features']}\n")
    
    for project_name, project_info in hierarchy['projects'].items():
        continue
        print(f"Project: {project_name}")
        print(f"  Models: {project_info['model_count']}")
        print(f"  Features: {project_info['feature_count']}")
        
        for model_name, model_info in project_info['models'].items():
            print(f"    Model: {model_name}")
            print(f"      Features: {model_info['feature_count']}")
            if model_info['feature_count'] > 0:
                for feature in model_info['features']:
                    print(f"        - {feature}")
        print()


def create_feature_dataframe(hierarchy: Dict) -> pd.DataFrame:
    """
    Create a dataframe with project, model, feature information, and timestamps
    
    Args: hierarchy: Hierarchical structure of projects, models, and features
    Returns: Pandas DataFrame with project, model, feature, and timestamp columns
    """
    data = []
    
    for project_name, project_info in hierarchy['projects'].items():
        for model_name, model_info in project_info['models'].items():
            created_at = model_info.get('created_at')
            updated_at = model_info.get('updated_at')
            version_name = model_info.get('version_name', model_name)
            
            for feature in model_info['features']:
                data.append({
                    'project': project_name,
                    'model': model_name,
                    'version_name': version_name,
                    'feature': feature,
                    'created_at': created_at,
                    'updated_at': updated_at
                })
    
    return pd.DataFrame(data)

def create_model_version_dataframe(hierarchy: Dict) -> pd.DataFrame:
    """
    Create a dataframe with project, model, version name, and timestamps - the final export table
    
    Args: hierarchy: Hierarchical structure of projects, models, and features
    Returns: Pandas DataFrame with project, model, version_name, created_at, updated_at columns
    """
    data = []
    
    for project_name, project_info in hierarchy['projects'].items():
        for model_name, model_info in project_info['models'].items():
            data.append({
                'project': project_name,
                'model': model_name,
                'version_name': model_info.get('version_name', model_name),
                'created_at': model_info.get('created_at'),
                'updated_at': model_info.get('updated_at'),
                'feature_count': model_info.get('feature_count', 0)
            })
    
    return pd.DataFrame(data)

def get_statistics(feature_df: pd.DataFrame) -> Dict:
    """
    Get comprehensive statistics for projects and models using the feature dataframe
    
    Args: feature_df: Dataframe with project, model, and feature information with timestamps
    Returns: Dictionary containing dataframes with project and model statistics
    """
    # Get model-level statistics by grouping features by project and model
    # Include timestamps in the groupby to preserve them
    model_stats = feature_df.groupby(['project', 'model', 'version_name', 'created_at', 'updated_at']).size()
    model_stats = model_stats.reset_index()
    model_stats.columns = list(model_stats.columns[:-1]) + ['feature_count']
    
    # Get project-level statistics by grouping models by project
    project_stats = model_stats.groupby('project').agg(
        model_count=('model', 'nunique'),
        feature_count=('feature_count', 'sum')
    ).reset_index()
    
    # Calculate high-level analytics
    summary_stats = {
        # Project-level statistics
        'mean_avg_models_per_project': project_stats['model_count'].mean(),
        'median_models_per_project': project_stats['model_count'].median(),
        'min_models_per_project': project_stats['model_count'].min(),
        'max_models_per_project': project_stats['model_count'].max(),
        
        # Model-level statistics
        'mean_avg_features_per_model': model_stats['feature_count'].mean(),
        'median_features_per_model': model_stats['feature_count'].median(),
        'min_features_per_model': model_stats['feature_count'].min(),
        'max_features_per_model': model_stats['feature_count'].max()
    }
    
    return {
        'project_stats': project_stats,
        'model_stats': model_stats,
        'summary_stats': summary_stats
    }

def get_enhanced_statistics(model_version_df: pd.DataFrame) -> Dict:
    """
    Get enhanced statistics including timestamp analysis from the model version dataframe
    
    Args: model_version_df: Dataframe with model version information and timestamps
    Returns: Dictionary containing enhanced statistics including timestamp insights
    """
    # Basic statistics
    basic_stats = {
        'total_models': len(model_version_df),
        'total_projects': model_version_df['project'].nunique(),
        'avg_models_per_project': len(model_version_df) / model_version_df['project'].nunique(),
        'avg_features_per_model': model_version_df['feature_count'].mean(),
        'median_features_per_model': model_version_df['feature_count'].median()
    }
    
    # Timestamp-based statistics (only for models with valid timestamps)
    valid_timestamps = model_version_df.dropna(subset=['created_at', 'updated_at'])
    
    timestamp_stats = {}
    if len(valid_timestamps) > 0:
        # Convert to datetime if they're not already
        if not pd.api.types.is_datetime64_any_dtype(valid_timestamps['created_at']):
            valid_timestamps = valid_timestamps.copy()
            valid_timestamps['created_at'] = pd.to_datetime(valid_timestamps['created_at'])
            valid_timestamps['updated_at'] = pd.to_datetime(valid_timestamps['updated_at'])
        
        timestamp_stats = {
            'models_with_timestamps': len(valid_timestamps),
            'earliest_model_created': valid_timestamps['created_at'].min(),
            'latest_model_created': valid_timestamps['created_at'].max(),
            'most_recent_update': valid_timestamps['updated_at'].max(),
            'avg_days_between_create_update': (valid_timestamps['updated_at'] - valid_timestamps['created_at']).dt.days.mean()
        }
    
    return {
        'basic_stats': basic_stats,
        'timestamp_stats': timestamp_stats,
        'project_breakdown': model_version_df.groupby('project').agg({
            'model': 'count',
            'feature_count': 'sum'
        }).reset_index().rename(columns={'model': 'model_count'})
    }


In [None]:
# Create feature dataframe
feature_df = create_feature_dataframe(hierarchy)
print(f"Feature DataFrame Shape: {feature_df.shape}")
print(f"Feature DataFrame Columns: {list(feature_df.columns)}")

# Create the final export table: project, model, version_name, created_at, updated_at
model_version_df = create_model_version_dataframe(hierarchy)
print(f"\nModel Version DataFrame Shape: {model_version_df.shape}")
print(f"Model Version DataFrame Columns: {list(model_version_df.columns)}")

# Save to CSV if output file specified
if EXPORT_CSV:
    feature_df.to_csv('env_stats__flattened_hierarchy.csv', index=False)
    model_version_df.to_csv('env_stats__overview.csv', index=False)
    print(f"\nEnriched feature dataframe saved to: env_stats__flattened_hierarchy.csv")
    print(f"Model version export table saved to: env_stats__overview.csv")

# Print summary
print_hierarchy_summary(hierarchy)

stats = get_statistics(feature_df=feature_df)
enhanced_stats = get_enhanced_statistics(model_version_df)

print("\n=== BASIC STATISTICS ===")
for k,v in stats['summary_stats'].items():
    print(f"{k}: {v}")

print("\n=== MORE STATISTICS ===")
for k,v in enhanced_stats['basic_stats'].items():
    print(f"{k}: {v}")

print("\n=== TIMESTAMP STATISTICS ===")
for k,v in enhanced_stats['timestamp_stats'].items():
    print(f"{k}: {v}")


In [None]:
print("\n=== PROJECT BREAKDOWN ===")
print("Models per Project (Enhanced):")
print(enhanced_stats['project_breakdown'].sort_values('model_count', ascending=False))

print("\nModels per Project (Traditional):")
print(stats['project_stats'].sort_values('model_count', ascending=False))

In [None]:
LIMIT = 15

print(f"\n=== MODEL ANALYSIS ===")
print(f"Top {LIMIT} Models by Feature Count (with timestamps):")
print(stats['model_stats'].sort_values('feature_count', ascending=False).head(LIMIT))

print(f"\n=== MODEL VERSION EXPORT TABLE (First {LIMIT}) ===")
print("This is the final export table: project, model, version_name, created_at, updated_at")
print(model_version_df.head(LIMIT))

# Show models with timestamp information
models_with_timestamps = model_version_df.dropna(subset=['created_at', 'updated_at'])
if len(models_with_timestamps) > 0:
    print(f"\n=== NEWEST AND OLDEST MODELS ===")
    print("Newest 5 models by creation date:")
    newest_models = models_with_timestamps.nlargest(5, 'created_at')[['project', 'model', 'version_name', 'created_at', 'updated_at']]
    print(newest_models)
    
    print("\nOldest 5 models by creation date:")
    oldest_models = models_with_timestamps.nsmallest(5, 'created_at')[['project', 'model', 'version_name', 'created_at', 'updated_at']]
    print(oldest_models)
    
    print("\nMost recently updated 5 models:")
    recent_updates = models_with_timestamps.nlargest(5, 'updated_at')[['project', 'model', 'version_name', 'created_at', 'updated_at']]
    print(recent_updates)
else:
    print(f"\nNo models found with timestamp information.")


In [None]:
# Final Summary and Export Information
print("="*60)
print("FINAL SUMMARY")
print("="*60)

print(f"\n📊 DATA COLLECTION COMPLETE")
print(f"   • Total projects analyzed: {len(hierarchy['projects'])}")
print(f"   • Total models found: {len(model_version_df)}")
print(f"   • Total features catalogued: {len(feature_df)}")

if EXPORT_CSV:
    print(f"\n📁 EXPORT FILES CREATED:")
    print(f"   • env_stats__overview.csv - Main export table with timestamps")
    print(f"     Columns: {list(model_version_df.columns)}")
    print(f"   • env_stats__flattened_hierarchy.csv - Detailed feature-level data with timestamps")
    print(f"     Columns: {list(feature_df.columns)}")

models_with_timestamps = model_version_df.dropna(subset=['created_at', 'updated_at'])
print(f"\n⏰ TIMESTAMP COVERAGE:")
print(f"   • Models with timestamps: {len(models_with_timestamps)} out of {len(model_version_df)}")
print(f"   • Coverage: {len(models_with_timestamps)/len(model_version_df)*100:.1f}%")

if len(models_with_timestamps) > 0:
    print(f"\n🕐 TIMESTAMP INSIGHTS:")
    print(f"   • Date range: {models_with_timestamps['created_at'].min()} to {models_with_timestamps['created_at'].max()}")
    print(f"   • Most recent update: {models_with_timestamps['updated_at'].max()}")

print(f"\n✅ NOTEBOOK EXECUTION COMPLETE")
