# Words

[WordsAPI](https://www.wordsapi.com/) (325314)


In [5]:
import concurrent.futures
import json
import pandas
import requests
from tqdm.notebook import tqdm_notebook
import urllib

## Utils

In [6]:
def chunks(list, n):
    # looping till length l
    for i in range(0, len(list), n):
        yield list[i:i + n]

## Constants

In [7]:
BASE_URL = "https://www.wordsapi.com/mashape/words"
WHEN = "2023-03-23T06:42:24.775Z"
ENCRYPTED = "8cfdb189e722909be89207bfe958babbaeb2290937f892b8"
LIMIT = 1000000
CHARACTERS = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z"]

## Search Words

In [8]:
words = []

for character in tqdm_notebook(CHARACTERS):
    url = f"{BASE_URL}?when={WHEN}&encrypted={ENCRYPTED}&limit={LIMIT}&letterPattern=^{character}"
    response = requests.request("GET", url, timeout=10)
    response_json = response.json()
    data = response_json.get("results", {}).get("data", [])
    words += data

words.sort()

with open('./words.txt', 'w') as fp:
    fp.write('\n'.join(words))

  0%|          | 0/36 [00:00<?, ?it/s]

## Get Words

In [10]:
words_file = open("./words.txt", "r")
words_string = words_file.read()
words = words_string.split("\n")

def get_word(word : str):
    try:
        encoded_word = urllib.parse.quote(word, safe='')
        url = f"{BASE_URL}/{encoded_word}?when={WHEN}&encrypted={ENCRYPTED}"
        response = requests.request("GET", url, timeout=10)
        if response.status_code == 200:
            response_text = response.text
            return response_text
        else:
            print(word, "Error")
            return ""
    except:
        print(word, "Error")
        return ""

characters_by_count = []

total = 0
 
for character in tqdm_notebook(CHARACTERS):
    words_with_x = list(filter(lambda word: word[0] == character, words))
    count = len(words_with_x)
    total += count
    characters_by_count.append({
        "character": character,
        "count": count,
    })

characters_by_count.append({
    "character": "total",
    "count": total,
})

sorted_characters_by_count = sorted(characters_by_count, key=lambda d: d['count'])

characters_by_count_data_frame = pandas.DataFrame.from_dict(sorted_characters_by_count)
characters_by_count_data_frame.to_csv('./characters.csv', index = False, header = True)

def get_words_with_x(words, character : str):
    return list(filter(lambda word: word[0] == character, words))

for item in [{ "character": "s", "count": 35970 }]:
    character = item.get("character", "")
    words_with_x = get_words_with_x(words, character)
    count = item.get("count", 0)
    words_with_x_with_details = []
    chunks_words_with_x = chunks(words_with_x, 1000)
    print(character, count)
    for chunk_words_with_x in chunks_words_with_x:
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = []
            for word in chunk_words_with_x:
                futures.append(executor.submit(get_word, word=word))
            for future in concurrent.futures.as_completed(futures):
                word_details = future.result()
                if word_details != "":
                    words_with_x_with_details.append(word_details)
        words_with_x_with_details = list(set(words_with_x_with_details))
        words_with_x_with_details.sort()
        with open(f'./jsonl/{character}.jsonl', 'w') as file_open:
    	    file_open.write('\n'.join(words_with_x_with_details))

  0%|          | 0/36 [00:00<?, ?it/s]

s 35970
saw-leaved Error
saw treesaw tooth Error
 Error
saw wrack Error
sawfly Error


## Check Words

In [None]:
import json

collected_characters_by_count = []

collected_total = 0

words_with_results = []

for character in tqdm_notebook(CHARACTERS):
    file_open = open(f"./jsonl/{character}.jsonl", "r")
    content : str = file_open.read()
    json_lines : list[str] = list(filter(lambda line: line != "", content.split("\n")))
    count = len(json_lines)
    collected_total += count
    collected_characters_by_count.append({
        "character": character,
        "collected": count
    })
    for json_line in json_lines:
        json_object = json.loads(json_line)
        word = json_object.get("word", "")
        results = json_object.get("results", [])
        if len(results) > 0:
            words_with_results.append(word)

collected_characters_by_count.append({
    "character": "total",
    "collected": collected_total,
})

sorted_characters_by_count = sorted(collected_characters_by_count, key=lambda d: d['character'])

collected_characters_by_count_data_frame = pandas.DataFrame.from_dict(sorted_characters_by_count)
merged_data_frame = characters_by_count_data_frame.merge(collected_characters_by_count_data_frame, how="inner", on="character")
merged_data_frame.to_csv('./characters.csv', index = False, header = True)

with open('./words_with_results.txt', 'w') as fp:
    fp.write('\n'.join(words_with_results))

  0%|          | 0/36 [00:00<?, ?it/s]