In [4]:
import pandas as pd
import numpy as np

# Load the cached domains dataset
df = pd.read_csv("../data/cached_domains_2025-06-12-17-00.csv")

print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
df.head()


Dataset shape: (47951, 3)
Columns: ['domain', 'status', 'ttl_seconds']

First few rows:


Unnamed: 0,domain,status,ttl_seconds
0,a l rm.ooo,free,596104
1,a lopez.ooo,free,596148
2,a yosselin.ooo,free,596081
3,a-mansour.ooo,free,597739
4,aabush.ooo,free,592799


In [None]:
# Filter for 3-letter domains (excluding .ooo)
three_letter_domains = df[df["domain"].str.replace(".ooo", "").str.len() == 3]

print(f"Total number of 3-letter domains in dataset: {len(three_letter_domains)}")
print(f"Total domains in dataset: {len(df)}")
print(
    f"Percentage of 3-letter domains: {len(three_letter_domains) / len(df) * 100:.2f}%"
)

# Check what columns indicate availability/taken status
print(f"\nColumns available: {list(df.columns)}")
if "available" in df.columns:
    taken_three_letter = three_letter_domains[
        three_letter_domains["available"] == False
    ]
    available_three_letter = three_letter_domains[
        three_letter_domains["available"] == True
    ]
    print(f"\n3-letter domains that are TAKEN: {len(taken_three_letter)}")
    print(f"3-letter domains that are AVAILABLE: {len(available_three_letter)}")
elif "status" in df.columns:
    print(f"\nUnique status values: {df['status'].unique()}")
    taken_three_letter = three_letter_domains[three_letter_domains["status"] != "free"]
    available_three_letter = three_letter_domains[
        three_letter_domains["status"] == "free"
    ]
    print(f"\n3-letter domains that are TAKEN: {len(taken_three_letter)}")
    print(f"3-letter domains that are AVAILABLE: {len(available_three_letter)}")
else:
    print("\nNeed to examine the data structure to determine availability status")


Total number of 3-letter domains in dataset: 1201
Total domains in dataset: 47951
Percentage of 3-letter domains: 2.50%

Columns available: ['domain', 'status', 'ttl_seconds']

Unique status values: ['free' 'taken']

3-letter domains that are TAKEN: 280
3-letter domains that are AVAILABLE: 921


In [13]:
# Show some examples of 3-letter domains
print("Sample of 3-letter domains:")
print(three_letter_domains.head(10))

if "available" in df.columns or "status" in df.columns:
    print("\n" + "=" * 50)
    if "available" in df.columns:
        print("Examples of TAKEN 3-letter domains:")
        print(taken_three_letter.head(10)["domain"].tolist())
        print("\nExamples of AVAILABLE 3-letter domains:")
        print(available_three_letter.head(10)["domain"].tolist())
    else:
        print("Examples of TAKEN 3-letter domains:")
        print(taken_three_letter.head(10)["domain"].tolist())
        print("\nExamples of AVAILABLE 3-letter domains:")
        print(available_three_letter.head(10)["domain"].tolist())


Sample of 3-letter domains:
      domain status  ttl_seconds
6    aad.ooo   free       597841
24   aan.ooo   free       594023
55   aba.ooo  taken       539495
102  abd.ooo  taken       544940
313  abe.ooo   free       596664
337  abg.ooo  taken       541924
346  abi.ooo   free       592295
395  abo.ooo   free       593622
447  abu.ooo   free       592063
466  aby.ooo  taken       545172

Examples of TAKEN 3-letter domains:
['aba.ooo', 'abd.ooo', 'abg.ooo', 'aby.ooo', 'ace.ooo', 'ach.ooo', 'adi.ooo', 'ado.ooo', 'adv.ooo', 'afa.ooo']

Examples of AVAILABLE 3-letter domains:
['aad.ooo', 'aan.ooo', 'abe.ooo', 'abi.ooo', 'abo.ooo', 'abu.ooo', 'aca.ooo', 'aco.ooo', 'ada.ooo', 'ade.ooo']


In [19]:
three_letter_domains.sort_values(by=["status", "domain"]).drop(
    columns=["ttl_seconds"]
).to_csv("three_letter_domains.csv", index=False)
