In [1]:
from typing import Mapping, List 

In [12]:
def parse(log_fname: str) -> Mapping[str, int]:
    freq_dict = {}
    
    with open(log_fname) as f:
        
        for line in f:
            ts, words_string, user_id = line.strip().split('\t')

            for word in words_string.split():

                if word in freq_dict:
                    freq_dict[word] += 1
                else:
                    freq_dict[word] = 1
        return freq_dict

In [13]:
parse('../data/parse_logs_amazon.txt')

{'big': 2, 'data': 1, 'models': 1, 'cats': 2, 'video': 1, 'files': 1}

In [26]:
def is_valid_isbn(s: str) -> bool:
    
    if '-' in s:
        
        parts = s.split('-')
        
        if len(parts) != 4:
            return False
        
        for part in parts[:-1]:
            if not part.isnumeric():
                return False
        
        if not parts[-1].isnumeric() and parts[-1] != 'x':
            return False
        
        return len(parts[0]) == 1 and len(parts[1]) == 5 and len(parts[2]) == 3 and len(parts[3]) == 1
    
    else:
        if len(s) != 10:
            return False
        return s[:-1].isnumeric() and (s[-1].isnumeric() or s[-1] == 'x')

In [27]:
assert is_valid_isbn('1-23456-789-0')
assert is_valid_isbn('1-23456-789-x')
assert is_valid_isbn('1234567890')
assert is_valid_isbn('123456789x')
assert is_valid_isbn('12345-6789x') == False
assert is_valid_isbn('1-23456-7890') == False
assert is_valid_isbn('1-2345w6-789-x') == False

In [22]:
def logs_with_isbns(log_fname: str) -> List[str]:
    
    queries_with_isbns = []
    
    with open(log_fname) as f:
        
        for line in f:
            ts, query, user_id = line.strip().split('\t')

            for word in query.split():
                
                if is_valid_isbn(word):
                    queries_with_isbns.append(query)
                    break
    return queries_with_isbns

In [23]:
logs_with_isbns('../data/parse_logs_amazon.txt')

['paper 1-23456-789-0', 'science 1234567890']

Mistakes:
    
 - used `isnum` instead of `isnumeric`
 - read only first line of the file
 - used "or" instead of "and" here: `if not parts[-1].isnumeric() and parts[-1] != 'x':`