## Problem: Write data columns

Using the following data,

    data=[5,4,6,1,9,0,3,9,2,7,10,8,4,7,1,2,7,6,5,2,8,2,0,1,1,1,2,10,6,2]

write a function `write_columns` to write the three following calculated columns to a user-specified comma-separated file:

    data_value, data_value**2, (data_value+data_value**2)/3.
    
Your written floating-point values should be formatted to the hundreths place. Your function can only accept lists of integers/floats as input.

In [1]:
data=[5,4,6,1,9,0,3,9,2,7,10,8,4,7,1,2,7,6,5,2,8,2,0,1,1,1,2,10,6,2]
def write_columns(data,fname):
    '''
    Given data as a list, write three columns to fname.
    :param: data 
    :type : list
    :param: fname
    :type: str
    '''
    assert fname.isspace() == False,"make sure file name is not an empty space"
    assert isinstance(fname,str),"confirm name of file must be string"
    assert len(fname) >= 1,"at least 1 character is required for file name"
    
    assert bool(data) == True,"make sure data is more than 0"
    assert isinstance(data,list),"input data must be a list"
    assert all(isinstance(item, int) | isinstance(item, float) for item in data),"input data must be a list of ints or float"
    
    f = open(fname,"w")
    
    for i in range(len(data)): 
        f.write("%d, %d, %.2f \n" % (data[i], data[i]**2, float((data[i] + data[i]**2))/3 ))

    return f.close()
    raise NotImplementedError()

## Problem: Text Processing

Download this [corpus of 10,000 common English words](https://storage.googleapis.com/class-notes-181217.appspot.com/google-10000-english-no-swears.txt) and write the indicated functions that answer the following questions:

- What is the longest word?
- What is the longest word that starts with a particular character (e.g., `s`)
- What is the most common starting letter?
- What is the most common ending letter?

Your functions should only take list of words as input.

In [2]:
# you can use this bit of code to download the words from the corpus
from urllib.request import urlopen

u='https://storage.googleapis.com/class-notes-181217.appspot.com/google-10000-english-no-swears.txt'
response = urlopen(u)
words = [i.strip().decode('utf8') for i in response.readlines()]

# write a function to compute the longest word
def get_longest_word(words):
    """ used zip to link key: word - value: len(word)
        then use max function to find the word with longest length
    """
    assert bool(words) == True, "make sure input words is not empty"
    assert isinstance(words,list), "input words must be type list"
    assert all(isinstance(item,str)for item in words),"inout words must be a list of string"
    
    d=(dict(zip(words,[len(i) for i in words])))
    return max([(i,j) for i,j in d.items()], key = lambda i:i[1])

    
    raise NotImplementedError()
    
def get_longest_words_startswith(words,starts):
    """find the longest word starting with a letter
    convert first letter to lower 
    using loop cho check
    """
    assert bool(words) == True,"words cannot be empty"
    assert isinstance(words,list),"words type must be type list"
    assert all(isinstance(item, str) for item in words),"words must be a list of string"
   
    assert isinstance(starts,str),"ensure starting letter type string"
    assert starts.isspace() == False,"ensure it not an empty string"
    assert len(starts) == 1,"string have to be a single letter"
    starts = starts.lower() #convert upper to lower letter
    word= []
    longest_word= " "
    for i in words:
        if i [0] == starts:
            word.append(i)
            word.sort()
            
    for i in word:
        if len(i)>= len(longest_word):
            longest_word = i
    
    return longest_word
          
    raise NotImplementedError()

def get_most_common_start(words):
    """find the most common start letter
    using loop to loop through words list and increase by one if see a match 
    else move to next letter
    """
    assert bool(words) == True,"words cannot be empty"
    assert isinstance(words,list),"words type must be type list"
    assert all(isinstance(item, str) for item in words),"words must be a list of string"
    
    common_letter = { }
    
    for  i in words:
        if i[0] not in common_letter.keys():
            common_letter[i[0]]=1
            
        else:
            common_letter[i[0]]+=1
    return max(common_letter,key= common_letter.get)
    raise NotImplementedError()

def get_most_common_end(words):
    """same as above but using -1 to find the last letter
    """
    assert bool(words) == True,"words cannot be empty"
    assert isinstance(words,list),"words must be a list"
    assert all(isinstance(item, str) for item in words),"words must be a list of string"    
    letter_freq = { }
    for i in words:  
        if i[len(i) - 1] not in letter_freq.keys():
            letter_freq[i[len(i) - 1]] = 1
        else:
            letter_freq[i[len(i) - 1]] +=1
    return max(letter_freq, key=letter_freq.get)
    raise NotImplementedError()

In [3]:
### BEGIN  TESTS
assert get_longest_words_startswith(words,'s')=='sustainability'
### END  TESTS

In [4]:
### BEGIN  TESTS
assert get_most_common_end(words)=='s'
### END  TESTS

In [5]:
assert len(get_longest_word.__doc__)>1
assert len(get_most_common_start.__doc__)>1
assert len(get_most_common_end.__doc__)>1
assert len(get_longest_words_startswith.__doc__)>1

## Problem: Write chunks of five

Using the same [corpus of 10,000 common English words](https://storage.googleapis.com/class-notes-181217.appspot.com/google-10000-english-no-swears.txt) as before,
create a new file that consists of each
consecutive non-overlapping sequence of five lines merged into one line. Here
are the first 10 lines:

    the of and to a
    in for is on that
    by this with i you
    it not or be are
    from at as your all
    have new more an was
    we will home can us
    about if page my has
    search free but our one
    other do no information time

If the last group has less than five at the end, just write out the last group.

In [6]:
def write_chunks_of_five(words,fname):
    '''
    :param: words
    :type: list
    :param: fname
    :type: str
    '''
    assert isinstance(fname,str),"file name must be of type string"
    assert fname.isspace() == False,"string cannot be empty"
    assert len(fname) >= 1,"string must have a least one char"
    
    assert isinstance(words,list),"words must be a list"
    assert bool(words) == True,"words cannot be empty"
    assert all(isinstance(item, str) for item in words),"words must be a list of string"

    counter = 0

    f = open(fname,"w")

    for item in words:
        if counter == 5:
            f.write("\n")
            counter = 0

        f.write("%s " % item)
        counter += 1 

    return f.close()

    raise NotImplementedError()