# Duplicate Encoder

The goal of this exercise is to convert a string to a new string where each character in the new string is "(" if that character appears only once in the original string, or ")" if that character appears more than once in the original string. Ignore capitalization when determining if a character is a duplicate.

Examples:

`"din"      =>  "((("`

`"recede"   =>  "()()()"`

`"Success"  =>  ")())())"`

`"(( @"     =>  "))(("`

Notes:

Assertion messages may be unclear about what they display in some languages.

If you read "...It Should encode XXX", the "XXX" is the expected result, not the input!

[LINK](https://www.codewars.com/kata/54b42f9314d9229fd6000d9c)

In [1]:
def duplicate_encode(word):
    word = word.lower()
    length = len(word)
    word_list = list(word)
    encoded_list = []
    for i in range(length):
        if word_list[i] not in (word_list[:i] + word_list[i+1:]):
            encoded_list.append('(')
        else:
            encoded_list.append(')')

    encoded = ''.join(encoded_list)

    return encoded

In [2]:
test = duplicate_encode("din")
answer = "((("
print(test)
print(answer)
print(test == answer)

(((
(((
True


In [3]:
test = duplicate_encode("recede")
answer = "()()()"
print(test)
print(answer)
print(test == answer)

()()()
()()()
True


In [4]:
test = duplicate_encode("Success")
answer = ")())())"
print(test)
print(answer)
print(test == answer)

)())())
)())())
True


In [5]:
test = duplicate_encode("(( @")
answer = "))(("
print(test)
print(answer)
print(test == answer)

))((
))((
True


---

# Count by X

Create a function with two arguments that will return an array of the first `n` multiples of `x`.

Assume both the given number and the number of times to count will be positive numbers greater than `0`.

Return the results as an array or list ( depending on language ).

Examples:<br>
`count_by(1,10) #should return [1,2,3,4,5,6,7,8,9,10]`<br>
`count_by(2,5) #should return [2,4,6,8,10]`

[LINK](https://www.codewars.com/kata/5513795bd3fafb56c200049e)

In [6]:
def count_by(x, n):
    array = []
    multiple = 0
    for i in range(n):
        multiple += x
        array.append(multiple)
    return array

In [7]:
test = count_by(1, 10)
answer = [1,2,3,4,5,6,7,8,9,10]
print(test)
print(answer)
print(test == answer)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
True


In [8]:
test = count_by(2, 5)
answer = [2,4,6,8,10]
print(test)
print(answer)
print(test == answer)

[2, 4, 6, 8, 10]
[2, 4, 6, 8, 10]
True


In [9]:
test = count_by(1, 5)
answer = [1, 2, 3, 4, 5]
print(test)
print(answer)
print(test == answer)

[1, 2, 3, 4, 5]
[1, 2, 3, 4, 5]
True


In [10]:
test = count_by(3, 5)
answer = [3, 6, 9, 12, 15]
print(test)
print(answer)
print(test == answer)

[3, 6, 9, 12, 15]
[3, 6, 9, 12, 15]
True


In [11]:
test = count_by(50, 5)
answer = [50, 100, 150, 200, 250]
print(test)
print(answer)
print(test == answer)

[50, 100, 150, 200, 250]
[50, 100, 150, 200, 250]
True


In [12]:
test = count_by(100, 5)
answer = [100, 200, 300, 400, 500]
print(test)
print(answer)
print(test == answer)

[100, 200, 300, 400, 500]
[100, 200, 300, 400, 500]
True


---

# Most frequently used words in a text

Write a function that, given a string of text (possibly with punctuation and line-breaks), returns an array of the top-3 most occurring words, in descending order of the number of occurrences.

#### Assumptions:
* A word is a string of letters (A to Z) optionally containing one or more apostrophes (`'`) in ASCII.
* Apostrophes can appear at the start, middle or end of a word (`'abc`, `abc'`, `'abc'`, `ab'c` are all valid)
* Any other characters (e.g. `#`, `\`, `/` , `.` ...) are not part of a word and should be treated as whitespace.
* Matches should be case-insensitive, and the words in the result should be lowercased.
* Ties may be broken arbitrarily.
* If a text contains fewer than three unique words, then either the top-2 or top-1 words should be returned, or an empty array if a text contains no words.

#### Examples:

#### Bonus points (not really, but just for fun):
* Avoid creating an array whose memory footprint is roughly as big as the input text.
* Avoid sorting the entire array of unique words.

[LINK](https://www.codewars.com/kata/51e056fe544cf36c410000fb)

In [13]:
import re

def top_3_words(text):
    text = text.lower()

    pattern = re.compile(r"'*[a-z]+[a-z']*[a-z]*'*")
    words = re.findall(pattern, text)

    if len(words) < 2:
        return words

    words_dict = {}

    for w in words:
        if w not in words_dict:
            words_dict[w] = 1
        else:
            words_dict[w] += 1

    top_words = []

    list_len = 3 if len(words_dict) >= 3 else len(words_dict)

    for i in range(list_len):
        word = ""
        counter = 0
        for w in words_dict:
            if words_dict[w] > counter:
                counter = words_dict[w]
                word = w
        top_words.append(word)
        del words_dict[word]

    return top_words

In [14]:
text = "a a a  b  c c  d d d d  e e e e e"
test = top_3_words(text)
answer = ["e", "d", "a"]
print(answer)
print(test)
print(answer == test)

['e', 'd', 'a']
['e', 'd', 'a']
True


In [15]:
text = "e e e e DDD ddd DdD: ddd ddd aa aA Aa, bb cc cC e e e"
test = top_3_words(text)
answer = ["e", "ddd", "aa"]
print(answer)
print(test)
print(answer == test)

['e', 'ddd', 'aa']
['e', 'ddd', 'aa']
True


In [16]:
text = "  //wont won't won't "
test = top_3_words(text)
answer = ["won't", "wont"]
print(answer)
print(test)
print(answer == test)

["won't", 'wont']
["won't", 'wont']
True


In [17]:
text = "  , e   .. "
test = top_3_words(text)
answer = ["e"]
print(answer)
print(test)
print(answer == test)

['e']
['e']
True


In [18]:
text = "  ...  "
test = top_3_words(text)
answer = []
print(answer)
print(test)
print(answer == test)

[]
[]
True


In [19]:
text = "  '  "
test = top_3_words(text)
answer = []
print(answer)
print(test)
print(answer == test)

[]
[]
True


In [20]:
text = "  '''  "
test = top_3_words(text)
answer = []
print(answer)
print(test)
print(answer == test)

[]
[]
True


In [21]:
text = """In a village of La Mancha, the name of which I have no desire to call to
mind, there lived not long since one of those gentlemen that keep a lance
in the lance-rack, an old buckler, a lean hack, and a greyhound for
coursing. An olla of rather more beef than mutton, a salad on most
nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so extra
on Sundays, made away with three-quarters of his income."""
test = top_3_words(text)
answer = ["a", "of", "on"]
print(answer)
print(test)
print(answer == test)

['a', 'of', 'on']
['a', 'of', 'on']
True


In [22]:
import unittest

class TestTop3Words(unittest.TestCase):
    def test(self):
        self.assertEqual(top_3_words("a a a  b  c c  d d d d  e e e e e"), ["e", "d", "a"])
        self.assertEqual(top_3_words("e e e e DDD ddd DdD: ddd ddd aa aA Aa, bb cc cC e e e"), ["e", "ddd", "aa"])
        self.assertEqual(top_3_words("  //wont won't won't "), ["won't", "wont"])
        self.assertEqual(top_3_words("  , e   .. "), ["e"])
        self.assertEqual(top_3_words("  ...  "), [])
        self.assertEqual(top_3_words("  '  "), [])
        self.assertEqual(top_3_words("  '''  "), [])
        self.assertEqual(top_3_words("""In a village of La Mancha, the name of which I have no desire to call to
        mind, there lived not long since one of those gentlemen that keep a lance
        in the lance-rack, an old buckler, a lean hack, and a greyhound for
        coursing. An olla of rather more beef than mutton, a salad on most
        nights, scraps on Saturdays, lentils on Fridays, and a pigeon or so extra
        on Sundays, made away with three-quarters of his income."""), ["a", "of", "on"])

unittest.main(argv=[''], defaultTest='TestTop3Words', verbosity=2, exit=False)

test (__main__.TestTop3Words.test) ... ok

----------------------------------------------------------------------
Ran 1 test in 0.002s

OK


<unittest.main.TestProgram at 0x28372e9acd0>

### Other amazingly short solution

In [23]:
import re
from collections import Counter

def top_3_words(text):
    words = re.findall(r"[a-z']*[a-z]+[a-z']*", text.lower())
    top_3 = Counter(words).most_common(3)
    return [tup[0] for tup in top_3]