In [20]:
import pandas as pd
import numpy as np

# Load the cached domains dataset
df = pd.read_csv("../data/cached_domains_2025-06-12-19-52.csv")

print(f"Dataset shape: {df.shape}")
print(f"Columns: {list(df.columns)}")
print("\nFirst few rows:")
df.head()


Dataset shape: (64335, 3)
Columns: ['domain', 'status', 'ttl_seconds']

First few rows:


Unnamed: 0,domain,status,ttl_seconds
0,a l rm.ooo,free,585744
1,a lopez.ooo,free,585789
2,a yosselin.ooo,free,585722
3,a-mansour.ooo,free,587379
4,aaa.ooo,free,602233


In [21]:
# Filter for 3-letter domains (excluding .ooo)
three_letter_domains = df[df["domain"].str.replace(".ooo", "").str.len() == 3]

print(f"Total number of 3-letter domains in dataset: {len(three_letter_domains)}")
print(f"Total domains in dataset: {len(df)}")
print(
    f"Percentage of 3-letter domains: {len(three_letter_domains) / len(df) * 100:.2f}%"
)

# Check what columns indicate availability/taken status
print(f"\nColumns available: {list(df.columns)}")
if "available" in df.columns:
    taken_three_letter = three_letter_domains[
        three_letter_domains["available"] == False
    ]
    available_three_letter = three_letter_domains[
        three_letter_domains["available"] == True
    ]
    print(f"\n3-letter domains that are TAKEN: {len(taken_three_letter)}")
    print(f"3-letter domains that are AVAILABLE: {len(available_three_letter)}")
elif "status" in df.columns:
    print(f"\nUnique status values: {df['status'].unique()}")
    taken_three_letter = three_letter_domains[three_letter_domains["status"] != "free"]
    available_three_letter = three_letter_domains[
        three_letter_domains["status"] == "free"
    ]
    print(f"\n3-letter domains that are TAKEN: {len(taken_three_letter)}")
    print(f"3-letter domains that are AVAILABLE: {len(available_three_letter)}")
else:
    print("\nNeed to examine the data structure to determine availability status")


Total number of 3-letter domains in dataset: 17585
Total domains in dataset: 64335
Percentage of 3-letter domains: 27.33%

Columns available: ['domain', 'status', 'ttl_seconds']

Unique status values: ['free' 'taken']

3-letter domains that are TAKEN: 1240
3-letter domains that are AVAILABLE: 16345


In [22]:
# Show some examples of 3-letter domains
print("Sample of 3-letter domains:")
print(three_letter_domains.head(10))

if "available" in df.columns or "status" in df.columns:
    print("\n" + "=" * 50)
    if "available" in df.columns:
        print("Examples of TAKEN 3-letter domains:")
        print(taken_three_letter.head(10)["domain"].tolist())
        print("\nExamples of AVAILABLE 3-letter domains:")
        print(available_three_letter.head(10)["domain"].tolist())
    else:
        print("Examples of TAKEN 3-letter domains:")
        print(taken_three_letter.head(10)["domain"].tolist())
        print("\nExamples of AVAILABLE 3-letter domains:")
        print(available_three_letter.head(10)["domain"].tolist())


Sample of 3-letter domains:
     domain status  ttl_seconds
4   aaa.ooo   free       602233
5   aab.ooo   free       602234
7   aac.ooo   free       602233
9   aad.ooo   free       587481
12  aae.ooo   free       602232
13  aaf.ooo   free       602232
15  aag.ooo   free       602235
17  aah.ooo   free       602235
18  aai.ooo  taken       602234
21  aaj.ooo   free       602234

Examples of TAKEN 3-letter domains:
['aai.ooo', 'aao.ooo', 'aaw.ooo', 'aba.ooo', 'abb.ooo', 'abd.ooo', 'abf.ooo', 'abg.ooo', 'abn.ooo', 'abp.ooo']

Examples of AVAILABLE 3-letter domains:
['aaa.ooo', 'aab.ooo', 'aac.ooo', 'aad.ooo', 'aae.ooo', 'aaf.ooo', 'aag.ooo', 'aah.ooo', 'aaj.ooo', 'aak.ooo']


In [24]:
three_letter_domains.sort_values(by=["domain"]).drop(columns=["ttl_seconds"]).to_csv(
    "three_letter_domains.csv", index=False
)
