In [1]:
import numpy as np
import pandas as pd
import networkx as nx
from random import sample

In [2]:
all_words = pd.read_csv("dictionary.csv", names=["word"])
all_words.columns
all_words.astype(str)

Unnamed: 0,word
0,aa
1,aah
2,aahed
3,aahing
4,aahs
...,...
172815,zymotic
172816,zymurgies
172817,zymurgy
172818,zyzzyva


In [3]:
four_letter_locs = all_words["word"].apply(lambda x: len(str(x)) == 4)
four_letters = all_words[four_letter_locs]

In [4]:
four_letters

Unnamed: 0,word
4,aahs
8,aals
46,abas
83,abba
88,abbe
...,...
172694,zoom
172705,zoon
172726,zoos
172746,zori


In [5]:
vowels = ("a", "e", "i", "o", "u")
def get_composition(s : str) -> str:
        
    composition = []
    for i, letter in enumerate(s):
        if letter in vowels:
            composition += "V"
            
        # basic decision making for if y is a vowel
        # y's at the end of words are always vowels
        # y's followed by vowels are consonants
        # all other y's are vowels
        elif letter == "y":
            if i == len(s)-1:
                composition += "V"
            elif s[i+1] in vowels:
                composition += "C"
            else:
                composition += "V"

        else:
            composition += "C"
    
    return "".join(composition)    

In [6]:
four_letters["composition"] = four_letters["word"].apply(lambda x: get_composition(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  four_letters["composition"] = four_letters["word"].apply(lambda x: get_composition(x))


In [7]:
four_letters

Unnamed: 0,word,composition
4,aahs,VVCC
8,aals,VVCC
46,abas,VCVC
83,abba,VCCV
88,abbe,VCCV
...,...,...
172694,zoom,CVVC
172705,zoon,CVVC
172726,zoos,CVVC
172746,zori,CVCV


In [8]:
words = four_letters["word"]

In [9]:
words

4         aahs
8         aals
46        abas
83        abba
88        abbe
          ... 
172694    zoom
172705    zoon
172726    zoos
172746    zori
172801    zyme
Name: word, Length: 3902, dtype: object

In [14]:
small_words = ["pool", 
               "poon", 
               "loon", 
               "swim", 
               "slim", 
               "slam", 
               "slot", 
               "slop", 
               "poop",
               "plot",
               "plop",
               "gray",
               "slip"]

In [21]:
G_small = nx.Graph()
G_small.add_nodes_from(small_words)
G_small.add_edge("pool", "poon")

In [29]:
G_small.edges.items()

ItemsView(EdgeView([('pool', 'poon'), ('pool', 'poop'), ('poon', 'loon'), ('poon', 'poop'), ('swim', 'slim'), ('slim', 'slam'), ('slim', 'slip'), ('slot', 'slop'), ('slot', 'plot'), ('slop', 'plop'), ('slop', 'slip'), ('poop', 'plop'), ('plot', 'plop')]))

In [24]:
def shares_edge(s1: str, s2: str) -> bool: 
    if len(s1) != len(s2):
        raise ValueError('Two strings must have the same length to share an edge')
    
    if s1 == s2: 
        raise ValueError('Two strings must not be the same to share an edge')
    
    dif = 0
    for i in range(len(s1)):
        if s1[i] != s2[i]:
            if dif:
                return False
            else:
                dif = 1
    return True

In [25]:
tests = [("poon", "pool", True), ("poon", "loop", False)]
for s1, s2, res in tests:
    print(s1, s2, shares_edge(s1, s2), "expected: ", res)

poon pool True expected:  True
poon loop False expected:  False


In [28]:
for i, w1 in enumerate(small_words):
    for j, w2 in enumerate(small_words[i+1:]):
        #print(w1, w2, shares_edge(w1, w2))
        if shares_edge(w1, w2):
            G_small.add_edge(w1, w2)

In [32]:
nx.shortest_path_length(G_small, "pool", "swim")
paths = nx.all_shortest_paths(G_small, "pool", "swim")

In [33]:
for p in paths: 
    print(p)

['pool', 'poop', 'plop', 'slop', 'slip', 'slim', 'swim']
