# Collecting Model Tree for DeepSeek-V3

This notebook uses the `HuggingFaceModelCollector` class to recursively collect information about the DeepSeek-V3 model and all its descendants.

In [None]:
from collect_huggingface_models import HuggingFaceModelCollector
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

: 

## Initialize the Collector and Collect Model Tree

In [None]:
# Initialize the collector
collector = HuggingFaceModelCollector()

# Define the base model and output file
base_model_id = "deepseek-ai/DeepSeek-V3"
output_file = "tree_info_deepseek-ai/DeepSeek-V3.csv"

# Collect the model tree
collector.collect_model_tree(base_model_id, output_file)

## Load and Analyze the Collected Data

In [None]:
# Load the collected data
df = pd.read_csv(output_file)

# Display basic information about the collected models
print(f"Total number of models in the tree: {len(df)}")
print("\nSample of collected data:")
display(df.head())

# Display summary statistics
print("\nSummary statistics:")
display(df.describe())

## Visualize the Model Tree

In [None]:
# Create a simple visualization of model relationships
plt.figure(figsize=(15, 8))

# Plot downloads vs likes
plt.scatter(df['downloads'], df['likes'], alpha=0.6)
plt.xlabel('Downloads')
plt.ylabel('Likes')
plt.title('Downloads vs Likes for Models in DeepSeek-V3 Tree')
plt.xscale('log')
plt.yscale('log')
plt.grid(True, alpha=0.3)
plt.show()

## Analyze Model Types and Tags

In [None]:
# Analyze library distribution
library_counts = df['library_name'].value_counts()
print("Library distribution:")
display(library_counts)

# Analyze pipeline tags
pipeline_counts = df['pipeline_tag'].value_counts()
print("\nPipeline tag distribution:")
display(pipeline_counts)