In [1]:
import pandas as pd
from sqlalchemy import create_engine, text

from rs_graph.db import models as db_models
from rs_graph.db.constants import PROD_DATABASE_FILEPATH

# Get db engine for production database
db_conn = create_engine(f"sqlite:///{PROD_DATABASE_FILEPATH}")

In [2]:
def read_table(table: str) -> pd.DataFrame:
    return pd.read_sql(text(f"SELECT * FROM {table}"), db_conn)


# Read all data from database
doc_repo_links = read_table(db_models.DocumentRepositoryLink.__tablename__)
researchers = read_table(db_models.Researcher.__tablename__)
devs = read_table(db_models.DeveloperAccount.__tablename__)
documents = read_table(db_models.Document.__tablename__)
document_contributors = read_table(db_models.DocumentContributor.__tablename__)
repositories = read_table(db_models.Repository.__tablename__)
repository_contributors = read_table(db_models.RepositoryContributor.__tablename__)
topics = read_table(db_models.Topic.__tablename__)
document_topics = read_table(db_models.DocumentTopic.__tablename__)
dataset_sources = read_table(db_models.DatasetSource.__tablename__)
researcher_dev_links = read_table(
    db_models.ResearcherDeveloperAccountLink.__tablename__
)
document_alternate_dois = read_table(db_models.DocumentAlternateDOI.__tablename__)

# Drop all "updated_datetime" and "created_datetime" columns
for df in [
    doc_repo_links,
    researchers,
    devs,
    documents,
    document_contributors,
    repositories,
    repository_contributors,
    topics,
    document_topics,
    dataset_sources,
    researcher_dev_links,
]:
    df.drop(columns=["updated_datetime", "created_datetime"], inplace=True)

# Specifically drop doc_repo_links "id" column
# It isn't used and will get in the way later when we do a lot of joins
doc_repo_links.drop(columns=["id"], inplace=True)

# Construct reduced doc_repo_links
original_doc_repo_links_len = len(doc_repo_links)
doc_repo_links = doc_repo_links.drop_duplicates(subset=["document_id"], keep=False)
doc_repo_links = doc_repo_links.drop_duplicates(subset=["repository_id"], keep=False)

# Reduce other tables to only documents / repositories in the updated doc_repo_links
documents = documents[documents["id"].isin(doc_repo_links["document_id"])]
repositories = repositories[repositories["id"].isin(doc_repo_links["repository_id"])]
document_contributors = document_contributors[
    document_contributors["document_id"].isin(documents["id"])
]
repository_contributors = repository_contributors[
    repository_contributors["repository_id"].isin(repositories["id"])
]
document_topics = document_topics[document_topics["document_id"].isin(documents["id"])]

# Reduce researchers and devs to only those in the
# updated document_contributors and repository_contributors
researchers = researchers[
    researchers["id"].isin(document_contributors["researcher_id"])
]
devs = devs[devs["id"].isin(repository_contributors["developer_account_id"])]
researcher_dev_links = researcher_dev_links[
    (
        researcher_dev_links["researcher_id"].isin(researchers["id"])
        & researcher_dev_links["developer_account_id"].isin(devs["id"])
    )
]

# Sort document topics and keep first
document_topics = document_topics.sort_values("score", ascending=False)
document_topics = document_topics.drop_duplicates(subset=["document_id"], keep="first")

# Create document, document topic merged table
merged_document_topics = pd.merge(
    document_topics, topics, left_on="topic_id", right_on="id"
)

# Create basic merged tables
merged_document_contributor_doc_repo_links = pd.merge(
    document_contributors, doc_repo_links, left_on="document_id", right_on="document_id"
)
merged_repository_contributor_doc_repo_links = pd.merge(
    repository_contributors,
    doc_repo_links,
    left_on="repository_id",
    right_on="repository_id",
)

# Compute stats for data sources
data_source_stats = []
for _, data_source in dataset_sources.iterrows():
    # Get total article-repo pairs
    data_source_stats.append(
        {
            "data_source": data_source["name"],
            "n_article_repo_pairs": len(
                doc_repo_links[doc_repo_links["dataset_source_id"] == data_source["id"]]
            ),
            "n_authors": merged_document_contributor_doc_repo_links.loc[
                merged_document_contributor_doc_repo_links["dataset_source_id"]
                == data_source["id"]
            ]["researcher_id"].nunique(),
            "n_devs": merged_repository_contributor_doc_repo_links.loc[
                merged_repository_contributor_doc_repo_links["dataset_source_id"]
                == data_source["id"]
            ]["developer_account_id"].nunique(),
        }
    )

# Create topic merged tables
merged_doc_repo_links_topics = pd.merge(
    doc_repo_links, document_topics, left_on="document_id", right_on="document_id"
).merge(topics, left_on="topic_id", right_on="id")
merged_doc_repo_links_topics_document_contributors = pd.merge(
    merged_doc_repo_links_topics,
    document_contributors,
    left_on="document_id",
    right_on="document_id",
)
merged_doc_repo_links_topics_repository_contributors = pd.merge(
    merged_doc_repo_links_topics,
    repository_contributors,
    left_on="repository_id",
    right_on="repository_id",
)

# Compute stats for domains
domain_stats = []
for domain in merged_doc_repo_links_topics.domain_name.unique():
    # Get total article-repo pairs
    domain_stats.append(
        {
            "domain": domain,
            "n_article_repo_pairs": len(
                merged_doc_repo_links_topics[
                    merged_doc_repo_links_topics["domain_name"] == domain
                ]
            ),
            "n_authors": merged_doc_repo_links_topics_document_contributors.loc[
                merged_doc_repo_links_topics_document_contributors["domain_name"]
                == domain
            ]["researcher_id"].nunique(),
            "n_devs": merged_doc_repo_links_topics_repository_contributors.loc[
                merged_doc_repo_links_topics_repository_contributors["domain_name"]
                == domain
            ]["developer_account_id"].nunique(),
        }
    )

# Create document merged tables
merged_doc_repo_links_documents = pd.merge(
    doc_repo_links, documents, left_on="document_id", right_on="id"
)
merged_doc_repo_links_documents_document_contributors = pd.merge(
    merged_doc_repo_links_documents,
    document_contributors,
    left_on="document_id",
    right_on="document_id",
)
merged_doc_repo_links_documents_repository_contributors = pd.merge(
    merged_doc_repo_links_documents,
    repository_contributors,
    left_on="repository_id",
    right_on="repository_id",
)

# Compute stats for document types
# This isn't a standard data pull
# In short:
# - pairs from PLOS are "research articles"
# - pairs from JOSS are "software articles"
# - pairs from SoftwareX are "software articles"
# - pairs from Papers with Code / ArXiv are "pre-prints"
#   UNLESS they have been published in a journal
# All of those should be easy to assert / apply a label to with the exception
# of Papers with Code / ArXiv pre-prints that have been published in a journal
# In that case, we need to look at the existing document type in the database
# If the document type is "preprint" use preprint, otherwise, if it's anything else,
# use "research article"

# Create a "reduced_doc_types" dataframe with document_id and "reduced_doc_type"
# columns
reduced_doc_types_rows = []
# We can use the "reduced_doc_types" dataframe to calculate the stats

# Iter over data sources even though we are looking for doc types
for _, data_source in dataset_sources.iterrows():
    # Get total article-repo pairs
    doc_type = None
    if data_source["name"] in ["plos", "joss", "softwarex"]:
        if data_source["name"] == "plos":
            doc_type = "research article"
        else:
            doc_type = "software article"

        # Add all document_ids to reduced_doc_types_rows
        reduced_doc_types_rows.extend(
            [
                {"document_id": doc_id, "reduced_doc_type": doc_type}
                for doc_id in doc_repo_links[
                    (doc_repo_links["dataset_source_id"] == data_source["id"])
                ]["document_id"]
            ]
        )

    # Handle PwC
    else:
        # Get preprint pairs
        preprint_pairs = merged_doc_repo_links_documents[
            (merged_doc_repo_links_documents["dataset_source_id"] == data_source["id"])
            & (merged_doc_repo_links_documents["document_type"] == "preprint")
        ]

        # Add all document_ids to reduced_doc_types_rows
        reduced_doc_types_rows.extend(
            [
                {"document_id": doc_id, "reduced_doc_type": "preprint"}
                for doc_id in preprint_pairs["document_id"]
            ]
        )

        # Get research article pairs
        # This is the same just inverted to != "preprint"
        research_article_pairs = merged_doc_repo_links_documents[
            (merged_doc_repo_links_documents["dataset_source_id"] == data_source["id"])
            & (merged_doc_repo_links_documents["document_type"] != "preprint")
        ]

        # Add all document_ids to reduced_doc_types_rows
        reduced_doc_types_rows.extend(
            [
                {"document_id": doc_id, "reduced_doc_type": "research article"}
                for doc_id in research_article_pairs["document_id"]
            ]
        )

# Create reduced_doc_types dataframe
reduced_doc_types = pd.DataFrame(reduced_doc_types_rows)

In [3]:
from tqdm import tqdm

# Create subset documents
docs_w_1_citation = documents.loc[documents["cited_by_count"] >= 1].copy()

# Take sample?
# docs_w_1_citation = docs_w_1_citation.sample(frac=0.1, random_state=12)

# Subset to only certain columns
docs_w_1_citation = docs_w_1_citation[
    [
        "id",
        "publication_date",
        "cited_by_count",
        "fwci",
        "is_open_access",
    ]
]

# Rename id to document_id
docs_w_1_citation = docs_w_1_citation.rename(columns={"id": "document_id"})

# Merge repository id in
docs_w_1_citation = docs_w_1_citation.merge(
    doc_repo_links[
        [
            "document_id",
            "repository_id",
        ]
    ],
    left_on="document_id",
    right_on="document_id",
)

# Merge in document details (domain, document type)
docs_w_1_citation = (
    docs_w_1_citation.merge(
        document_topics[["document_id", "topic_id"]],
        left_on="document_id",
        right_on="document_id",
    )
    .merge(
        repositories[["id", "creation_datetime", "last_pushed_datetime"]],
        left_on="repository_id",
        right_on="id",
    )
    .drop(
        columns=["id"],
    )
    .merge(
        topics[["id", "domain_name"]],
        left_on="topic_id",
        right_on="id",
    )
    .drop(
        columns=["id", "topic_id"],
    )
    .merge(
        reduced_doc_types,
        left_on="document_id",
        right_on="document_id",
    )
    .rename(
        columns={
            "domain_name": "domain",
            "reduced_doc_type": "article_type",
        }
    )
)

# Drop any documents that have more than one repository (and vice versa)
docs_w_1_citation = docs_w_1_citation.drop_duplicates(
    subset=["document_id"], keep=False
)
docs_w_1_citation = docs_w_1_citation.drop_duplicates(
    subset=["repository_id"], keep=False
)

# Iter over articles and get the team composition info
team_composition_rows = []
cc_na_individuals = []
for _, row in tqdm(
    docs_w_1_citation.iterrows(),
    total=len(docs_w_1_citation),
):
    # Get the number of authors
    author_ids = document_contributors.loc[
        document_contributors["document_id"] == row["document_id"]
    ]["researcher_id"].unique()
    n_authors = len(author_ids)

    # Get the number of devs
    dev_ids = repository_contributors.loc[
        repository_contributors["repository_id"] == row["repository_id"]
    ]["developer_account_id"].unique()
    n_devs = len(dev_ids)

    # Get the set of dev_researcher_links for the devs
    dev_researcher_links = researcher_dev_links.loc[
        researcher_dev_links["developer_account_id"].isin(dev_ids)
    ].sort_values("predictive_model_confidence", ascending=False)

    # Drop duplicates by dev_id (keeping first)
    # as we may have accidently matched the same author to the multiple devs
    dev_researcher_links = dev_researcher_links.drop_duplicates(
        subset=["researcher_id"],
        keep="first",
    )

    # Drop any dev researcher links that have less than 97% confidence
    dev_researcher_links = dev_researcher_links.loc[
        dev_researcher_links["predictive_model_confidence"] >= 0.97
    ]

    # Get the number of devs who aren't authors
    n_non_author_devs = 0
    for dev_id in dev_ids:
        dev_researcher_ids = dev_researcher_links.loc[
            dev_researcher_links["developer_account_id"] == dev_id
        ]["researcher_id"].unique()

        if len(dev_researcher_ids) > 3:
            continue

        if not any(
            dev_researcher_id in author_ids for dev_researcher_id in dev_researcher_ids
        ):
            cc_na_individuals.append(
                {
                    "document_id": row["document_id"],
                    "repository_id": row["repository_id"],
                    "n_authors": n_authors,
                    "n_devs": n_devs,
                    "developer_account_id": dev_id,
                    "researcher_ids_count": len(dev_researcher_ids),
                    "researcher_ids": dev_researcher_ids.tolist(),
                }
            )

# Create cc_na_individuals dataframe
cc_na_individuals = pd.DataFrame(cc_na_individuals)

# Merge with cc_na_individuals
cc_na_individuals = cc_na_individuals.merge(
    docs_w_1_citation,
    left_on=["document_id", "repository_id"],
    right_on=["document_id", "repository_id"],
)

# Filter out papers with less than 3 authors or 1 dev
cc_na_individuals = cc_na_individuals.loc[cc_na_individuals["n_authors"] >= 3]
cc_na_individuals = cc_na_individuals.loc[cc_na_individuals["n_devs"] >= 1]

# Convert datetimes to datetime
cc_na_individuals["publication_date"] = pd.to_datetime(
    cc_na_individuals["publication_date"],
    utc=True,
)
cc_na_individuals["last_pushed_datetime"] = pd.to_datetime(
    cc_na_individuals["last_pushed_datetime"],
    utc=True,
)
cc_na_individuals["creation_datetime"] = pd.to_datetime(
    cc_na_individuals["creation_datetime"],
    utc=True,
)

# Create a "days_since_last_push" column
cc_na_individuals["days_from_publication_to_last_push"] = (
    cc_na_individuals["last_pushed_datetime"] - cc_na_individuals["publication_date"]
).dt.days

# Must have a push within 90 days of publication
cc_na_individuals_no_push_after_pub = cc_na_individuals.loc[
    cc_na_individuals["days_from_publication_to_last_push"] <= 90
].copy()

# Get the number of authors which would be the 97th percentile
n_authors_97th_percentile = cc_na_individuals_no_push_after_pub["n_authors"].quantile(
    0.97
)

# Remove rows that are greater than 97th percentile for total authors
cc_na_individuals_no_push_after_pub = cc_na_individuals_no_push_after_pub.loc[
    cc_na_individuals_no_push_after_pub["n_authors"]
    <= cc_na_individuals_no_push_after_pub["n_authors"].quantile(0.97)
]

cc_na_individuals_no_push_after_pub[
    [
        "document_id",
        "repository_id",
        "developer_account_id",
        "researcher_ids_count",
        "researcher_ids",
    ]
]

100%|██████████| 88551/88551 [02:00<00:00, 735.26it/s]


Unnamed: 0,document_id,repository_id,developer_account_id,researcher_ids_count,researcher_ids
8,21,21,39,0,[]
17,55,55,89,0,[]
47,82,81,164,0,[]
74,126,125,261,0,[]
81,138,137,283,0,[]
...,...,...,...,...,...
79430,157265,153544,130890,0,[]
79456,157277,153555,130890,0,[]
79457,157279,153557,130890,0,[]
79458,157280,153558,130890,0,[]


In [4]:
cc_na_individuals_no_push_after_pub.loc[
    cc_na_individuals_no_push_after_pub["researcher_ids_count"] > 1
][
    [
        "document_id",
        "repository_id",
        "developer_account_id",
        "researcher_ids_count",
        "researcher_ids",
    ]
].sort_values(
    by="researcher_ids_count", ascending=False
)

Unnamed: 0,document_id,repository_id,developer_account_id,researcher_ids_count,researcher_ids
26443,43020,41089,40097,3,"[72424, 44008, 281382]"
18545,25305,24280,1576,3,"[209976, 304662, 91404]"
39664,71206,68300,28978,3,"[83498, 174985, 252680]"
23085,35663,34110,10027,3,"[97043, 104851, 293211]"
57340,110223,106641,4140,3,"[81282, 1093, 87934]"
...,...,...,...,...,...
36462,64096,61366,34238,2,"[60280, 12572]"
36614,64322,61591,7040,2,"[185747, 183477]"
38099,68257,65398,80595,2,"[6365, 16309]"
38514,68939,66064,30927,2,"[53189, 195198]"


In [5]:
len(
    cc_na_individuals_no_push_after_pub.loc[
        cc_na_individuals_no_push_after_pub["researcher_ids_count"] > 1
    ]
) / len(cc_na_individuals_no_push_after_pub)

0.030234166584329612

In [6]:
len(
    cc_na_individuals_no_push_after_pub.loc[
        cc_na_individuals_no_push_after_pub["researcher_ids_count"] > 1
    ]
)

306

In [7]:
cc_na_individuals_no_push_after_pub.loc[
    cc_na_individuals_no_push_after_pub["researcher_ids_count"] == 0
][
    [
        "document_id",
        "repository_id",
        "developer_account_id",
        "researcher_ids_count",
        "researcher_ids",
    ]
].sample(
    10
)

Unnamed: 0,document_id,repository_id,developer_account_id,researcher_ids_count,researcher_ids
41438,75381,72373,72535,0,[]
54988,105139,101615,129245,0,[]
28915,47837,45803,75708,0,[]
31627,53557,51256,81968,0,[]
12154,10266,9957,25628,0,[]
35957,63047,60353,91978,0,[]
18353,24868,23848,46682,0,[]
54631,104535,101030,128711,0,[]
21587,32648,31200,56851,0,[]
26907,44112,42153,71137,0,[]


In [None]:
import time
from pathlib import Path

from gh_tokens_loader import GitHubTokensCycler
from ghapi.all import GhApi

tokens_file = Path.cwd().parent / ".github-tokens.yml"
gh_tokens_cycler = GitHubTokensCycler(tokens_file)

# Take a random sample of 20 rows
# Then, get the:
# - document id
# - document DOI
# - repository id
# - repository URL
# - developer account id
# - developer account URL


def prep_for_annotation(row: pd.Series) -> dict:
    # Get document details
    doc_details = documents.loc[documents["id"] == row["document_id"]].iloc[0]
    doc_doi = doc_details["doi"]
    doc_doi_url = f"https://doi.org/{doc_doi}"

    # Get repository details
    repo_details = repositories.loc[repositories["id"] == row["repository_id"]].iloc[0]
    repo_owner = repo_details["owner"]
    repo_name = repo_details["name"]
    repo_url = f"https://github.com/{repo_owner}/{repo_name}"

    # Get developer account details
    dev_details = devs.loc[devs["id"] == row["developer_account_id"]].iloc[0]
    dev_username = dev_details["username"]
    dev_url = f"https://github.com/{dev_username}"

    # Dev commit URL
    dev_commit_url = (
        f"https://github.com/{repo_owner}/{repo_name}/commits?author={dev_username}"
    )

    # Get devs contributions to repository
    results = None
    try:
        call_iter = 0
        while call_iter < 9:
            api = GhApi(
                token=next(gh_tokens_cycler),
            )
            results = api.repos.get_contributors_stats(
                owner=repo_owner,
                repo=repo_name,
            )

            # If the results are empty, wait and try again
            if len(results) == 0:
                time.sleep(20)
                call_iter += 1
                results = None
                continue

            # If we have results, break the loop
            break

        # If we have results, filter for the developer account
        if results is None:
            print(f"No results found for {repo_url} after {call_iter + 1} iterations.")
            raise ValueError(
                f"No results found for {repo_url} after {call_iter + 1} iterations."
            )

        # Print number of call iterations
        print(f"Number of API call iterations required: {call_iter + 1}")

        # Get the total number of commits, additions, and deletions for the repo
        # Iter over each contributor, and iter over each week in "weeks", sum all values
        total_repo_commits = 0
        total_repo_additions = 0
        total_repo_deletions = 0
        for contributor in results:
            for week in contributor["weeks"]:
                total_repo_commits += week["c"]
                total_repo_additions += week["a"]
                total_repo_deletions += week["d"]

        # If we have results, filter for the developer account
        filtered_to_contributor = [
            contributor
            for contributor in results
            if contributor["author"]["login"] == dev_username
        ]
        if len(filtered_to_contributor) > 0:
            contributor = filtered_to_contributor[0]
            total_commits = sum(week["c"] for week in contributor["weeks"])
            total_additions = sum(week["a"] for week in contributor["weeks"])
            total_deletions = sum(week["d"] for week in contributor["weeks"])
        else:
            total_commits = 0
            total_additions = 0
            total_deletions = 0

        # Add all of these details to the results
        return {
            "document_id": row["document_id"],
            "document_doi": doc_doi_url,
            "repository_id": row["repository_id"],
            "repository_url": repo_url,
            "developer_account_id": row["developer_account_id"],
            "developer_account_url": dev_url,
            "developer_contribution_url": dev_commit_url,
            "n_commits": total_commits,
            "n_additions": total_additions,
            "n_deletions": total_deletions,
            "total_repo_commits": total_repo_commits,
            "total_repo_additions": total_repo_additions,
            "total_repo_deletions": total_repo_deletions,
        }

    except Exception as e:
        print(f"Error processing {repo_url} for {dev_username}: {e}")
        return {
            "document_id": row["document_id"],
            "document_doi": doc_doi_url,
            "repository_id": row["repository_id"],
            "repository_url": repo_url,
            "developer_account_id": row["developer_account_id"],
            "developer_account_url": dev_url,
            "developer_contribution_url": dev_commit_url,
        }


# Take a random sample of 20 rows
sampled_rows = cc_na_individuals_no_push_after_pub.drop_duplicates(
    "developer_account_id", keep="first"
).sample(n=20, random_state=12)

annotation_ready = []
for _, row in tqdm(
    sampled_rows.iterrows(),
    total=len(sampled_rows),
):
    # Prepare the row for annotation
    annotation_ready.append(prep_for_annotation(row))

    # Convert to DataFrame
    annotated_df = pd.DataFrame(annotation_ready)
    annotated_df.to_csv("cc-na-individuals-sample.csv", index=False)

# Convert to DataFrame
annotated_df = pd.DataFrame(annotation_ready)
annotated_df.to_csv("cc-na-individuals-sample.csv", index=False)

Expired GitHub Tokens: 1


  0%|          | 0/20 [00:00<?, ?it/s]

Expired GitHub Tokens: 1


  5%|▌         | 1/20 [00:00<00:09,  2.07it/s]

Number of API call iterations required: 1


 10%|█         | 2/20 [00:01<00:11,  1.50it/s]

Number of API call iterations required: 1


 15%|█▌        | 3/20 [00:01<00:11,  1.47it/s]

Number of API call iterations required: 1


 20%|██        | 4/20 [00:43<04:28, 16.78s/it]

Number of API call iterations required: 3


 25%|██▌       | 5/20 [00:44<02:46, 11.07s/it]

Number of API call iterations required: 1


 30%|███       | 6/20 [03:50<16:28, 70.62s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for memmett: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


 35%|███▌      | 7/20 [03:50<10:18, 47.61s/it]

Number of API call iterations required: 1


 40%|████      | 8/20 [04:31<09:05, 45.45s/it]

Number of API call iterations required: 3


 45%|████▌     | 9/20 [07:36<16:19, 89.01s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for asalmgren: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


 50%|█████     | 10/20 [09:18<15:31, 93.13s/it]

Number of API call iterations required: 6


 55%|█████▌    | 11/20 [10:20<12:31, 83.49s/it]

Number of API call iterations required: 4


 60%|██████    | 12/20 [11:22<10:15, 76.94s/it]

Number of API call iterations required: 4


 65%|██████▌   | 13/20 [11:42<06:58, 59.85s/it]

Number of API call iterations required: 2


 70%|███████   | 14/20 [12:44<06:01, 60.27s/it]

Number of API call iterations required: 4


 75%|███████▌  | 15/20 [13:45<05:02, 60.59s/it]

Number of API call iterations required: 4


 80%|████████  | 16/20 [14:06<03:14, 48.66s/it]

Number of API call iterations required: 2


 85%|████████▌ | 17/20 [15:07<02:37, 52.51s/it]

Number of API call iterations required: 4


 90%|█████████ | 18/20 [15:28<01:25, 42.97s/it]

Number of API call iterations required: 2


 95%|█████████▌| 19/20 [16:50<00:54, 54.66s/it]

Number of API call iterations required: 5


100%|██████████| 20/20 [17:52<00:00, 53.63s/it]

Number of API call iterations required: 4





In [41]:
# Take a random sample of 200 rows
sampled_rows = cc_na_individuals_no_push_after_pub.drop_duplicates(
    "developer_account_id", keep="first"
).sample(n=200, random_state=12)

annotation_ready = []
for _, row in tqdm(
    sampled_rows.iterrows(),
    total=len(sampled_rows),
):
    # Prepare the row for annotation
    annotation_ready.append(prep_for_annotation(row))

    # Convert to DataFrame
    annotated_df = pd.DataFrame(annotation_ready)
    annotated_df.to_csv("cc-na-individuals-full.csv", index=False)

# Convert to DataFrame
annotated_df = pd.DataFrame(annotation_ready)
annotated_df.to_csv("cc-na-individuals-full.csv", index=False)

  0%|          | 1/200 [00:00<00:47,  4.16it/s]

Number of API call iterations required: 1


  1%|          | 2/200 [00:00<01:03,  3.13it/s]

Number of API call iterations required: 1


  2%|▏         | 3/200 [00:00<01:07,  2.90it/s]

Number of API call iterations required: 1


  2%|▏         | 4/200 [00:01<00:59,  3.32it/s]

Number of API call iterations required: 1


  2%|▎         | 5/200 [00:22<25:01,  7.70s/it]

Number of API call iterations required: 2


  3%|▎         | 6/200 [03:26<3:39:17, 67.82s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for memmett: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


  4%|▎         | 7/200 [03:27<2:27:18, 45.79s/it]

Number of API call iterations required: 1


  4%|▍         | 8/200 [03:27<1:40:27, 31.39s/it]

Number of API call iterations required: 1


  4%|▍         | 9/200 [06:33<4:13:30, 79.64s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for asalmgren: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


  5%|▌         | 10/200 [06:33<2:54:35, 55.13s/it]

Number of API call iterations required: 1


  6%|▌         | 11/200 [06:33<2:00:45, 38.33s/it]

Number of API call iterations required: 1


  6%|▌         | 12/200 [06:34<1:24:06, 26.84s/it]

Number of API call iterations required: 1


  6%|▋         | 13/200 [06:54<1:17:42, 24.93s/it]

Number of API call iterations required: 2


  7%|▋         | 14/200 [07:15<1:13:11, 23.61s/it]

Number of API call iterations required: 2


  8%|▊         | 15/200 [07:56<1:29:09, 28.92s/it]

Number of API call iterations required: 3


  8%|▊         | 16/200 [07:57<1:02:29, 20.38s/it]

Number of API call iterations required: 1


  8%|▊         | 17/200 [07:57<43:51, 14.38s/it]  

Number of API call iterations required: 1


  9%|▉         | 18/200 [10:41<2:59:29, 59.17s/it]

Number of API call iterations required: 9


 10%|▉         | 19/200 [10:41<2:05:08, 41.48s/it]

Number of API call iterations required: 1


 10%|█         | 20/200 [10:41<1:27:21, 29.12s/it]

Number of API call iterations required: 1


 10%|█         | 21/200 [11:02<1:19:16, 26.57s/it]

Number of API call iterations required: 2


 11%|█         | 22/200 [12:03<1:49:49, 37.02s/it]

Number of API call iterations required: 4


 12%|█▏        | 23/200 [13:05<2:10:57, 44.39s/it]

Number of API call iterations required: 4


 12%|█▏        | 24/200 [13:46<2:07:15, 43.38s/it]

Number of API call iterations required: 3


 12%|█▎        | 25/200 [14:07<1:46:46, 36.61s/it]

Number of API call iterations required: 2


 13%|█▎        | 26/200 [14:27<1:32:24, 31.87s/it]

Number of API call iterations required: 2


 14%|█▎        | 27/200 [14:48<1:22:06, 28.47s/it]

Number of API call iterations required: 2


 14%|█▍        | 28/200 [16:09<2:07:15, 44.39s/it]

Number of API call iterations required: 5


 14%|█▍        | 29/200 [16:50<2:03:26, 43.31s/it]

Number of API call iterations required: 3


 15%|█▌        | 30/200 [17:32<2:01:02, 42.72s/it]

Number of API call iterations required: 3


 16%|█▌        | 31/200 [18:33<2:15:57, 48.27s/it]

Number of API call iterations required: 4


 16%|█▌        | 32/200 [18:54<1:52:10, 40.06s/it]

Number of API call iterations required: 2


 16%|█▋        | 33/200 [19:14<1:35:17, 34.24s/it]

Number of API call iterations required: 2


 17%|█▋        | 34/200 [20:36<2:13:59, 48.43s/it]

Number of API call iterations required: 5


 18%|█▊        | 35/200 [20:57<1:50:31, 40.19s/it]

Number of API call iterations required: 2


 18%|█▊        | 36/200 [21:39<1:51:00, 40.61s/it]

Number of API call iterations required: 3


 18%|█▊        | 37/200 [22:20<1:50:54, 40.82s/it]

Number of API call iterations required: 3


 19%|█▉        | 38/200 [23:22<2:07:50, 47.35s/it]

Number of API call iterations required: 4


 20%|█▉        | 39/200 [23:43<1:45:34, 39.35s/it]

Number of API call iterations required: 2


 20%|██        | 40/200 [24:04<1:30:15, 33.84s/it]

Number of API call iterations required: 2


 20%|██        | 41/200 [25:06<1:52:21, 42.40s/it]

Number of API call iterations required: 4


 21%|██        | 42/200 [26:08<2:06:38, 48.09s/it]

Number of API call iterations required: 4


 22%|██▏       | 43/200 [26:29<1:44:52, 40.08s/it]

Number of API call iterations required: 2


 22%|██▏       | 44/200 [26:50<1:29:28, 34.42s/it]

Number of API call iterations required: 2


 22%|██▎       | 45/200 [27:32<1:34:14, 36.48s/it]

Number of API call iterations required: 3


 23%|██▎       | 46/200 [28:13<1:37:27, 37.97s/it]

Number of API call iterations required: 3


 24%|██▎       | 47/200 [28:34<1:23:45, 32.84s/it]

Number of API call iterations required: 2


 24%|██▍       | 48/200 [28:55<1:14:05, 29.25s/it]

Number of API call iterations required: 2


 24%|██▍       | 49/200 [29:16<1:07:10, 26.69s/it]

Number of API call iterations required: 2


 25%|██▌       | 50/200 [29:56<1:17:19, 30.93s/it]

Number of API call iterations required: 3


 26%|██▌       | 51/200 [30:58<1:39:17, 39.99s/it]

Number of API call iterations required: 4


 26%|██▌       | 52/200 [31:39<1:39:23, 40.30s/it]

Number of API call iterations required: 3


 26%|██▋       | 53/200 [32:40<1:54:22, 46.68s/it]

Number of API call iterations required: 4


 27%|██▋       | 54/200 [33:22<1:49:42, 45.09s/it]

Number of API call iterations required: 3


 28%|██▊       | 55/200 [34:23<2:00:51, 50.01s/it]

Number of API call iterations required: 4


 28%|██▊       | 56/200 [34:44<1:38:57, 41.23s/it]

Number of API call iterations required: 2


 28%|██▊       | 57/200 [35:05<1:23:49, 35.17s/it]

Number of API call iterations required: 2


 29%|██▉       | 58/200 [35:26<1:13:08, 30.91s/it]

Number of API call iterations required: 2


 30%|██▉       | 59/200 [36:07<1:19:51, 33.99s/it]

Number of API call iterations required: 3


 30%|███       | 60/200 [36:27<1:09:51, 29.94s/it]

Number of API call iterations required: 2


 30%|███       | 61/200 [36:48<1:02:57, 27.18s/it]

Number of API call iterations required: 2


 31%|███       | 62/200 [37:30<1:12:52, 31.69s/it]

Number of API call iterations required: 3


 32%|███▏      | 63/200 [37:51<1:04:53, 28.42s/it]

Number of API call iterations required: 2


 32%|███▏      | 64/200 [38:32<1:12:58, 32.20s/it]

Number of API call iterations required: 3


 32%|███▎      | 65/200 [39:34<1:32:11, 40.98s/it]

Number of API call iterations required: 4


 33%|███▎      | 66/200 [40:57<1:59:36, 53.55s/it]

Number of API call iterations required: 5


 34%|███▎      | 67/200 [41:17<1:36:45, 43.65s/it]

Number of API call iterations required: 2


 34%|███▍      | 68/200 [41:58<1:34:07, 42.78s/it]

Number of API call iterations required: 3


 34%|███▍      | 69/200 [43:00<1:45:50, 48.47s/it]

Number of API call iterations required: 4


 35%|███▌      | 70/200 [44:01<1:53:39, 52.46s/it]

Number of API call iterations required: 4


 36%|███▌      | 71/200 [45:03<1:58:48, 55.26s/it]

Number of API call iterations required: 4


 36%|███▌      | 72/200 [46:26<2:15:41, 63.61s/it]

Number of API call iterations required: 5


 36%|███▋      | 73/200 [47:07<2:00:26, 56.90s/it]

Number of API call iterations required: 3


 37%|███▋      | 74/200 [47:28<1:36:39, 46.03s/it]

Number of API call iterations required: 2


 38%|███▊      | 75/200 [50:35<3:04:05, 88.36s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for ax3l: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


 38%|███▊      | 76/200 [50:56<2:20:34, 68.02s/it]

Number of API call iterations required: 2


 38%|███▊      | 77/200 [51:17<1:50:19, 53.82s/it]

Number of API call iterations required: 2


 39%|███▉      | 78/200 [51:37<1:29:08, 43.84s/it]

Number of API call iterations required: 2


 40%|███▉      | 79/200 [52:59<1:51:21, 55.22s/it]

Number of API call iterations required: 5


 40%|████      | 80/200 [54:00<1:54:01, 57.01s/it]

Number of API call iterations required: 4


 40%|████      | 81/200 [54:21<1:31:33, 46.16s/it]

Number of API call iterations required: 2


 41%|████      | 82/200 [55:02<1:28:02, 44.77s/it]

Number of API call iterations required: 3


 42%|████▏     | 83/200 [55:03<1:01:21, 31.46s/it]

Number of API call iterations required: 1


 42%|████▏     | 84/200 [58:06<2:28:47, 76.96s/it]

No results found for https://github.com/mengqidyangge/hierkd after 10 iterations.
Error processing https://github.com/mengqidyangge/hierkd for mengqiDyangge: No results found for https://github.com/mengqidyangge/hierkd after 10 iterations.


 42%|████▎     | 85/200 [58:27<1:55:05, 60.05s/it]

Number of API call iterations required: 2


 43%|████▎     | 86/200 [58:47<1:31:35, 48.20s/it]

Number of API call iterations required: 2


 44%|████▎     | 87/200 [1:00:09<1:49:59, 58.40s/it]

Number of API call iterations required: 5


 44%|████▍     | 88/200 [1:01:11<1:50:40, 59.29s/it]

Number of API call iterations required: 4


 44%|████▍     | 89/200 [1:01:52<1:39:34, 53.82s/it]

Number of API call iterations required: 3


 45%|████▌     | 90/200 [1:03:35<2:05:50, 68.64s/it]

Number of API call iterations required: 6


 46%|████▌     | 91/200 [1:04:16<1:49:32, 60.30s/it]

Number of API call iterations required: 3


 46%|████▌     | 92/200 [1:04:37<1:27:35, 48.66s/it]

Number of API call iterations required: 2


 46%|████▋     | 93/200 [1:04:58<1:12:04, 40.42s/it]

Number of API call iterations required: 2


 47%|████▋     | 94/200 [1:05:19<1:00:51, 34.44s/it]

Number of API call iterations required: 2


 48%|████▊     | 95/200 [1:06:41<1:25:08, 48.66s/it]

Number of API call iterations required: 5


 48%|████▊     | 96/200 [1:07:22<1:20:26, 46.41s/it]

Number of API call iterations required: 3


 48%|████▊     | 97/200 [1:10:26<2:30:20, 87.58s/it]

No results found for https://github.com/uhh-iss/zeek after 10 iterations.
Error processing https://github.com/uhh-iss/zeek for dnthayer: No results found for https://github.com/uhh-iss/zeek after 10 iterations.


 49%|████▉     | 98/200 [1:11:08<2:05:36, 73.89s/it]

Number of API call iterations required: 3


 50%|████▉     | 99/200 [1:12:09<1:58:16, 70.26s/it]

Number of API call iterations required: 4


 50%|█████     | 100/200 [1:13:12<1:53:09, 67.90s/it]

Number of API call iterations required: 4


 50%|█████     | 101/200 [1:13:32<1:28:42, 53.76s/it]

Number of API call iterations required: 2


 51%|█████     | 102/200 [1:14:14<1:21:41, 50.01s/it]

Number of API call iterations required: 3


 52%|█████▏    | 103/200 [1:14:55<1:16:36, 47.39s/it]

Number of API call iterations required: 3


 52%|█████▏    | 104/200 [1:16:37<1:42:09, 63.85s/it]

Number of API call iterations required: 6


 52%|█████▎    | 105/200 [1:16:58<1:20:33, 50.87s/it]

Number of API call iterations required: 2


 53%|█████▎    | 106/200 [1:20:01<2:22:01, 90.66s/it]

No results found for https://github.com/disi-unibo-nlp/medgenie after 10 iterations.
Error processing https://github.com/disi-unibo-nlp/medgenie for disi-unibo-nlp: No results found for https://github.com/disi-unibo-nlp/medgenie after 10 iterations.


 54%|█████▎    | 107/200 [1:20:22<1:47:56, 69.64s/it]

Number of API call iterations required: 2


 54%|█████▍    | 108/200 [1:21:03<1:33:35, 61.04s/it]

Number of API call iterations required: 3


 55%|█████▍    | 109/200 [1:21:45<1:24:03, 55.43s/it]

Number of API call iterations required: 3


 55%|█████▌    | 110/200 [1:22:07<1:07:47, 45.19s/it]

Number of API call iterations required: 2


 56%|█████▌    | 111/200 [1:22:07<47:01, 31.71s/it]  

Number of API call iterations required: 1


 56%|█████▌    | 112/200 [1:22:48<50:32, 34.46s/it]

Number of API call iterations required: 3


 56%|█████▋    | 113/200 [1:23:50<1:01:52, 42.68s/it]

Number of API call iterations required: 4


 57%|█████▋    | 114/200 [1:24:11<52:03, 36.33s/it]  

Number of API call iterations required: 2


 57%|█████▊    | 115/200 [1:25:33<1:10:53, 50.04s/it]

Number of API call iterations required: 5


 58%|█████▊    | 116/200 [1:26:35<1:15:05, 53.64s/it]

Number of API call iterations required: 4


 58%|█████▊    | 117/200 [1:27:37<1:17:27, 55.99s/it]

Number of API call iterations required: 4


 59%|█████▉    | 118/200 [1:28:18<1:10:28, 51.57s/it]

Number of API call iterations required: 3


 60%|█████▉    | 119/200 [1:28:19<49:07, 36.39s/it]  

Number of API call iterations required: 1


 60%|██████    | 120/200 [1:29:00<50:23, 37.79s/it]

Number of API call iterations required: 3


 60%|██████    | 121/200 [1:30:22<1:07:05, 50.96s/it]

Number of API call iterations required: 5


 61%|██████    | 122/200 [1:33:28<1:58:58, 91.52s/it]

No results found for https://github.com/stevenireeves/amrex after 10 iterations.
Error processing https://github.com/stevenireeves/amrex for adam-m-jcbs: No results found for https://github.com/stevenireeves/amrex after 10 iterations.


 62%|██████▏   | 123/200 [1:35:10<2:01:38, 94.79s/it]

Number of API call iterations required: 6


 62%|██████▏   | 124/200 [1:35:31<1:31:59, 72.63s/it]

Number of API call iterations required: 2


 62%|██████▎   | 125/200 [1:36:13<1:19:09, 63.33s/it]

Number of API call iterations required: 3


 63%|██████▎   | 126/200 [1:36:34<1:02:29, 50.67s/it]

Number of API call iterations required: 2


 64%|██████▎   | 127/200 [1:37:15<58:19, 47.93s/it]  

Number of API call iterations required: 3


 64%|██████▍   | 128/200 [1:37:57<55:09, 45.96s/it]

Number of API call iterations required: 3


 64%|██████▍   | 129/200 [1:39:39<1:14:30, 62.96s/it]

Number of API call iterations required: 6


 65%|██████▌   | 130/200 [1:40:00<58:43, 50.34s/it]  

Number of API call iterations required: 2


 66%|██████▌   | 131/200 [1:40:42<54:47, 47.64s/it]

Number of API call iterations required: 3


 66%|██████▌   | 132/200 [1:41:24<52:13, 46.08s/it]

Number of API call iterations required: 3


 66%|██████▋   | 133/200 [1:42:26<56:41, 50.77s/it]

Number of API call iterations required: 4


 67%|██████▋   | 134/200 [1:44:08<1:12:54, 66.29s/it]

Number of API call iterations required: 6


 68%|██████▊   | 135/200 [1:44:50<1:03:47, 58.89s/it]

Number of API call iterations required: 3


 68%|██████▊   | 136/200 [1:45:31<57:06, 53.54s/it]  

Number of API call iterations required: 3


 68%|██████▊   | 137/200 [1:47:54<1:24:32, 80.51s/it]

Number of API call iterations required: 8


 69%|██████▉   | 138/200 [1:48:35<1:10:58, 68.68s/it]

Number of API call iterations required: 3


 70%|██████▉   | 139/200 [1:48:57<55:36, 54.69s/it]  

Number of API call iterations required: 2


 70%|███████   | 140/200 [1:49:18<44:28, 44.47s/it]

Number of API call iterations required: 2


 70%|███████   | 141/200 [1:49:59<42:44, 43.47s/it]

Number of API call iterations required: 3


 71%|███████   | 142/200 [1:51:42<59:04, 61.11s/it]

Number of API call iterations required: 6


 72%|███████▏  | 143/200 [1:53:44<1:15:40, 79.66s/it]

Number of API call iterations required: 7


 72%|███████▏  | 144/200 [1:54:07<58:14, 62.41s/it]  

Number of API call iterations required: 2


 72%|███████▎  | 145/200 [1:54:28<45:57, 50.14s/it]

Number of API call iterations required: 2


 73%|███████▎  | 146/200 [1:55:50<53:43, 59.70s/it]

Number of API call iterations required: 5


 74%|███████▎  | 147/200 [1:56:11<42:23, 48.00s/it]

Number of API call iterations required: 2


 74%|███████▍  | 148/200 [1:57:12<45:07, 52.07s/it]

Number of API call iterations required: 4


 74%|███████▍  | 149/200 [1:58:14<46:38, 54.88s/it]

Number of API call iterations required: 4


 75%|███████▌  | 150/200 [1:58:34<37:09, 44.60s/it]

Number of API call iterations required: 2


 76%|███████▌  | 151/200 [1:58:55<30:34, 37.44s/it]

Number of API call iterations required: 2


 76%|███████▌  | 152/200 [1:59:36<30:46, 38.47s/it]

Number of API call iterations required: 3


 76%|███████▋  | 153/200 [2:00:17<30:44, 39.23s/it]

Number of API call iterations required: 3


 77%|███████▋  | 154/200 [2:01:18<35:06, 45.79s/it]

Number of API call iterations required: 4


 78%|███████▊  | 155/200 [2:01:59<33:15, 44.34s/it]

Number of API call iterations required: 3


 78%|███████▊  | 156/200 [2:03:00<36:14, 49.41s/it]

Number of API call iterations required: 4


 78%|███████▊  | 157/200 [2:03:21<29:14, 40.79s/it]

Number of API call iterations required: 2


 79%|███████▉  | 158/200 [2:05:24<45:47, 65.41s/it]

Number of API call iterations required: 7


 80%|███████▉  | 159/200 [2:07:46<1:00:30, 88.55s/it]

Number of API call iterations required: 8


 80%|████████  | 160/200 [2:08:49<53:47, 80.70s/it]  

Number of API call iterations required: 4


 80%|████████  | 161/200 [2:09:51<48:54, 75.25s/it]

Number of API call iterations required: 4


 81%|████████  | 162/200 [2:11:35<52:59, 83.66s/it]

Number of API call iterations required: 6


 82%|████████▏ | 163/200 [2:12:16<43:44, 70.92s/it]

Number of API call iterations required: 3


 82%|████████▏ | 164/200 [2:15:03<59:57, 99.92s/it]

Number of API call iterations required: 9


 82%|████████▎ | 165/200 [2:15:46<48:18, 82.82s/it]

Number of API call iterations required: 3


 83%|████████▎ | 166/200 [2:16:27<39:49, 70.27s/it]

Number of API call iterations required: 3


 84%|████████▎ | 167/200 [2:17:49<40:28, 73.60s/it]

Number of API call iterations required: 5


 84%|████████▍ | 168/200 [2:19:34<44:19, 83.11s/it]

Number of API call iterations required: 6


 84%|████████▍ | 169/200 [2:19:55<33:14, 64.33s/it]

Number of API call iterations required: 2


 85%|████████▌ | 170/200 [2:20:15<25:37, 51.24s/it]

Number of API call iterations required: 2


 86%|████████▌ | 171/200 [2:20:56<23:16, 48.17s/it]

Number of API call iterations required: 3


 86%|████████▌ | 172/200 [2:21:37<21:27, 45.97s/it]

Number of API call iterations required: 3


 86%|████████▋ | 173/200 [2:21:58<17:21, 38.56s/it]

Number of API call iterations required: 2


 87%|████████▋ | 174/200 [2:22:19<14:22, 33.18s/it]

Number of API call iterations required: 2


 88%|████████▊ | 175/200 [2:23:41<19:53, 47.75s/it]

Number of API call iterations required: 5


 88%|████████▊ | 176/200 [2:23:42<13:29, 33.73s/it]

Number of API call iterations required: 1


 88%|████████▊ | 177/200 [2:25:03<18:26, 48.09s/it]

Number of API call iterations required: 5


 89%|████████▉ | 178/200 [2:27:06<25:51, 70.54s/it]

Number of API call iterations required: 7


 90%|████████▉ | 179/200 [2:27:48<21:37, 61.78s/it]

Number of API call iterations required: 3


 90%|█████████ | 180/200 [2:28:29<18:32, 55.62s/it]

Number of API call iterations required: 3


 90%|█████████ | 181/200 [2:28:50<14:18, 45.21s/it]

Number of API call iterations required: 2


 91%|█████████ | 182/200 [2:29:51<15:03, 50.17s/it]

Number of API call iterations required: 4


 92%|█████████▏| 183/200 [2:30:33<13:27, 47.48s/it]

Number of API call iterations required: 3


 92%|█████████▏| 184/200 [2:31:14<12:09, 45.62s/it]

Number of API call iterations required: 3


 92%|█████████▎| 185/200 [2:31:17<08:12, 32.80s/it]

Number of API call iterations required: 1


 93%|█████████▎| 186/200 [2:31:58<08:16, 35.43s/it]

Number of API call iterations required: 3


 94%|█████████▎| 187/200 [2:35:03<17:21, 80.12s/it]

No results found for https://github.com/uhh-iss/zeek after 10 iterations.
Error processing https://github.com/uhh-iss/zeek for jrolli: No results found for https://github.com/uhh-iss/zeek after 10 iterations.


 94%|█████████▍| 188/200 [2:36:04<14:52, 74.40s/it]

Number of API call iterations required: 4


 94%|█████████▍| 189/200 [2:36:25<10:41, 58.29s/it]

Number of API call iterations required: 2


 95%|█████████▌| 190/200 [2:37:06<08:51, 53.19s/it]

Number of API call iterations required: 3


 96%|█████████▌| 191/200 [2:39:08<11:05, 73.90s/it]

Number of API call iterations required: 7


 96%|█████████▌| 192/200 [2:39:29<07:43, 57.94s/it]

Number of API call iterations required: 2


 96%|█████████▋| 193/200 [2:39:51<05:29, 47.08s/it]

Number of API call iterations required: 2


 97%|█████████▋| 194/200 [2:40:12<03:55, 39.29s/it]

Number of API call iterations required: 2


 98%|█████████▊| 195/200 [2:40:13<02:19, 27.81s/it]

Number of API call iterations required: 1


 98%|█████████▊| 196/200 [2:40:54<02:07, 31.83s/it]

Number of API call iterations required: 3


 98%|█████████▊| 197/200 [2:41:15<01:26, 28.68s/it]

Number of API call iterations required: 2


 99%|█████████▉| 198/200 [2:41:17<00:41, 20.73s/it]

Number of API call iterations required: 1


100%|█████████▉| 199/200 [2:42:39<00:39, 39.04s/it]

Number of API call iterations required: 5


100%|██████████| 200/200 [2:44:22<00:00, 49.31s/it]

Number of API call iterations required: 6



