# Get model cards graph

Here we get the AI ecosystem graph for the model cards similarity analysis. This is a networkx network where every node is a model in the AI ecosystem and every edge is a relation including finetunes, quantizations, and adapters, *but not merges*. Thus, the graph is a tree.

We define the following attributes over the nodes: the length of modelcard in characters and words, and whether the card was automatically generated. We will pickle this graph and use it for other analyses later.

In [1]:
import numpy as np
import pandas as pd
import networkx as nx
import pickle

In [2]:
# Read the json dataset
df = pd.read_csv("data/ai_ecosystem_withmodelcards_withcardinfo.csv")

df.head()

Unnamed: 0.1,Unnamed: 0,model_id,likes,trendingScore,private,downloads,tags,pipeline_tag,library_name,createdAt,...,quantized_parent,adapter_parent,merge_parent,languages,modelCard,ratelimit_retries,exception_raised,model_card_length_characters,model_card_length_words,model_card_automatically_generated
0,0,moonshotai/Kimi-K2-Instruct,479,479.0,False,13356,"['transformers', 'safetensors', 'kimi_k2', 'te...",text-generation,transformers,2025-07-11T00:55:12.000Z,...,[],[],[],[],---\nlicense: other\nlicense_name: modified-mi...,0.0,,25090,2120,False
1,1,THUDM/GLM-4.1V-9B-Thinking,569,367.0,False,33839,"['transformers', 'safetensors', 'glm4v', 'imag...",image-text-to-text,transformers,2025-06-28T14:24:10.000Z,...,[],[],[],"['English', 'Chinese']",---\r\nlicense: mit\r\nlanguage:\r\n- en\r\n- ...,0.0,,4721,422,False
2,2,HuggingFaceTB/SmolLM3-3B,351,351.0,False,21863,"['transformers', 'safetensors', 'smollm3', 'te...",text-generation,transformers,2025-07-08T10:11:45.000Z,...,[],[],[],"['English', 'French', 'Spanish', 'Italian', 'P...",---\nlibrary_name: transformers\nlicense: apac...,0.0,,15929,2282,False
3,3,black-forest-labs/FLUX.1-Kontext-dev,1568,247.0,False,230863,"['diffusers', 'safetensors', 'image-generation...",image-to-image,diffusers,2025-05-28T22:23:43.000Z,...,[],[],[],['English'],---\nlanguage:\n- en\nlicense: other\nlicense_...,0.0,,9621,1158,False
4,4,mistralai/Devstral-Small-2507,155,155.0,False,5090,"['vllm', 'safetensors', 'mistral', 'text2text-...",text-generation,vllm,2025-07-04T14:23:44.000Z,...,[],[],[],"['English', 'French', 'German', 'Spanish', 'Po...",---\nlanguage:\n- en\n- fr\n- de\n- es\n- pt\n...,0.0,,18761,2110,False


## The full graph

In [3]:
# Get also the already-defined graph
with open('data/ai_ecosystem_graph.pkl', 'rb') as f:
    G = pickle.load(f)

# For every row in df, append the card from the df to the node in G
for index, row in df.iterrows():
    model_id = row['model_id']
    G.nodes[model_id]['modelCard'] = row['modelCard']
    G.nodes[model_id]['model_card_length_characters'] = row['model_card_length_characters']
    G.nodes[model_id]['model_card_length_words'] = row['model_card_length_words']
    G.nodes[model_id]['model_card_automatically_generated'] = row['model_card_automatically_generated']

# Save the graph
with open('data/ai_ecosystem_graph_modelcards.pkl', 'wb') as f:
    pickle.dump(G, f)

## The nomerges graph

In [None]:
# Get also the already-defined graph
with open('data/ai_ecosystem_graph_nomerges.pkl', 'rb') as f:
    G = pickle.load(f)

# For every row in df, append the card from the df to the node in G
for index, row in df.iterrows():
    model_id = row['model_id']
    G.nodes[model_id]['modelCard'] = row['modelCard']
    G.nodes[model_id]['model_card_length_characters'] = row['model_card_length_characters']
    G.nodes[model_id]['model_card_length_words'] = row['model_card_length_words']
    G.nodes[model_id]['model_card_automatically_generated'] = row['model_card_automatically_generated']

# Save the graph
with open('data/ai_ecosystem_graph_nomerges_modelcards.pkl', 'wb') as f:
    pickle.dump(G, f)

## The finetune graph

In [None]:
# Get also the already-defined graph
with open('data/ai_ecosystem_graph_finetune.pkl', 'rb') as f:
    G_finetuneonly = pickle.load(f)

# For every row in df, append the card from the df to the node in G
for index, row in df.iterrows():
    model_id = row['model_id']
    G_finetuneonly.nodes[model_id]['modelCard'] = row['modelCard']
    G_finetuneonly.nodes[model_id]['model_card_length_characters'] = row['model_card_length_characters']
    G_finetuneonly.nodes[model_id]['model_card_length_words'] = row['model_card_length_words']
    G_finetuneonly.nodes[model_id]['model_card_automatically_generated'] = row['model_card_automatically_generated']

# Save the graph
with open('data/ai_ecosystem_graph_finetune_modelcards.pkl', 'wb') as f:
    pickle.dump(G_finetuneonly, f)