In [107]:
from bs4 import BeautifulSoup
import numpy as np
import pandas as pd
import requests
import re
import tdqm

In [133]:
outpath = 'E:\WordAssociations'

def single_associations(word:str) :
    """
    gets all word associations with a single word
    :param word: word to be checked
    :return:
    """
    base_url = 'https://wordassociations.net/en/words-associated-with/'
    url = base_url + word

    page = requests.get(url)
    assert str(page.status_code).startswith('2'), f'Page failed to load. Error code: {page.status_code}'

    soup = BeautifulSoup(page.content, 'html.parser')
    html = list(soup.children)[1]
    body = list(html.children)[1]
    n_container = list(list(body)[0])[1]
    n_content = list(n_container)[4]
    n_content_left = list(n_content)[0]
    words_column = list(n_content_left)[0]
    
    noun_section = soup.find_all('div', class_='section NOUN-SECTION')
    adj_section = soup.find_all('div', class_='section ADJECTIVE-SECTION')
    verb_section = soup.find_all('div', class_='section VERB-SECTION')
    adverb_section = soup.find_all('div', class_='section ADVERB-SECTION')
    nouns, adjs, verbs, adverbs = set(), set(), set(), set()
    if noun_section:
        nouns = noun_section[0].find_all('a')
        nouns = set(tag.text for tag in nouns)
    if adj_section:
        adjs = adj_section[0].find_all('a')
        adjs = set(tag.text for tag in adjs)
    if verb_section:
        verbs = verb_section[0].find_all('a')
        verbs = set(tag.text for tag in verbs)
    if adverb_section:
        adverbs = adverb_section[0].find_all('a')
        adverbs = set(tag.text for tag in adverbs)
    return nouns | adjs | verbs | adverbs

In [135]:
def double_associations(word1, word2):
    set1 = single_associations(word1)
    set2 = single_associations(word2)
    return set1.intersection(set2)

In [136]:
def incept_associations(word1:str, word2:str, levels:int=2):
    assert levels > 1
    set1 = single_associations(word1)
    set2 = single_associations(word2)
    intersections = [set1.intersection(set2)]
    for i in range(1, levels):
        n_set1 = set1.copy()
        n_set2 = set2.copy()
        for w in set1 - set2:
            n_set1 = n_set1 | single_associations(w)
        for w in set2 - set1:
            n_set2 = n_set2 | single_associations(w)
        intersections.append(n_set1.intersection(n_set2) - intersections[0])
    return intersections

In [143]:
def main():
    print(double_association('wolf','music'))
    common_words = incept_associations('wolf', 'music', levels=2)
    for i, words in enumerate(common_words):
        print(f'Level {i}:\n{words}')

In [144]:
if __name__ == '__main__':
    main()

set()
Level 0:
set()
Level 1:
{'Tori', 'Jump', 'Seal', 'Playhouse', 'Hercules', 'Whirl', 'Gillian', 'Noise', 'Visionary', 'Lamar', 'Johannes', 'Lore', 'Mickey', 'Barley', 'Fiddle', 'Ives', 'Assortment', 'Swing', 'Fry', 'Heavy', 'Cracker', 'Folder', 'Evaporate', 'Combo', 'Henrik', 'Donkey', 'Celtic', 'Babylon', 'Keats', 'Nyc', 'Royale', 'Techno', 'Rocker', 'Composer', 'Sacred', 'Jakob', 'Robbie', 'Presenter', 'Paced', 'Syriac', 'Trojan', 'Monterey', 'Peterborough', 'Gutter', 'Orpheus', 'Brewing', 'Growl', 'Carole', 'Revelation', 'Sonnet', 'Keynes', 'Spears', 'Ankara', 'Biblical', 'Ninth', 'Audience', 'Karma', 'Thriller', 'Quintet', 'Madonna', 'Cm', 'Avatar', 'Mesopotamia', 'Torrent', 'Cursing', 'Pear', 'Freestyle', 'Resurrection', 'Kingston', 'Van', 'Nickel', 'Cincinnati', 'Sebastian', 'Flat', 'Quixote', 'Nate', 'Monumental', 'Rage', 'Horror', 'Lobo', 'Alejandro', 'Godfather', 'Gee', 'Headlight', 'Jackson', 'Mammal', 'Vladimir', 'Brew', 'Dd', 'Andreas', 'Bully', 'Vapor', 'Superhero', 'C