## Propulsion Academy Data Science curriculum
### Day 2: Python programming challenges

#### Challenge 1
Print the difference between two UNIX timestamps as human-readable time

In [1]:
# First we import the required libraries
import datetime
import dateutil.relativedelta
import math
# Convert the timestamps to to datetime format
dt1 = datetime.datetime.fromtimestamp(1493560132)
dt2 = datetime.datetime.fromtimestamp(1493565412)
# Get the differene between two timestamps
dtt = dt2 - dt1
# Get the difference in seconds
offset_seconds = dtt.total_seconds()
## convert offset to a string first and then spit into hours and minutes
str_time_diff = str(datetime.timedelta(seconds=offset_seconds))
str_time_diff_split = str_time_diff.split(":")
# print the time
print("The time difference is " + str_time_diff_split[0] + " hour " + str_time_diff_split[1], "minutes")

The time difference is 1 hour 28 minutes


#### Challenge 2
Given a string, print which words appear within which other words

In [3]:
# import re for regular expression match
import re

string = "There is a palm tree on the island"
# Split the strings into individual words

string_ls = string.split()

## Loop over words in the list generated above
for word in string_ls:
    # finding the word in the list
    output = list(filter(lambda x:word in x, string_ls))
    # only printing words with more than one appearance
    if len(output) > 1:
        print("\"" + word + "\"" + " is in words " + ", ".join(output))



"is" is in words is, island
"a" is in words a, palm, island


#### Challenge 3
Print all words in a string that are anagrams of each other

In [1]:
import six
import string

def list_anagrams(s):
    # first we remove the punctuation. translate is very fast, and uses a dictionary of
    # known punctuation characters (which is why we have to import string)
    punctuation_remover = str.maketrans('', '', string.punctuation)
    s = s.translate(punctuation_remover)
    
    # then we'll convert the string to lowercase, and split it into words
    # split() is equivalent to split(" ")
    words = s.lower().split()
    
    # this is our main data structure: a dictionary containing the words as key, and the
    # corresponding sorted list of characters as value. If two values are the same, then
    # the corresponding keys (words) are anagrams
    letter_bags = {w: ''.join(sorted(w)) for w in words}
    # iterating over all word
    cases = []
    for word in words:
        # building a list of anagrams for a given word: all words that have the same 'letter bag'
        # (but a word is not an anagram of itself)
        anagrams = [w for w,lb in six.iteritems(letter_bags) if lb == letter_bags[word] and w != word]
        if anagrams and word not in cases: 
            cases.append(word)
            cases.extend(anagrams)
            
            print('%s has %d anagram%s: %s' % (word,
                                               len(anagrams), 
                                               's' if len(anagrams) > 1 else '',
                                               ', '.join(anagrams),)
                 )
    print('The sentence has %d anagrams: %s' % (len(cases), ', '.join(cases)))

            # note that this function returns nothing

In [2]:
list_anagrams('how can you listen if you are not silent')

listen has 1 anagram: silent
silent has 1 anagram: listen


In [3]:
# the function is insensitive to case and punctuation
list_anagrams('how can you LISTEN! if you are not siLent???')

listen has 1 anagram: silent
silent has 1 anagram: listen


In [4]:
# since we're building a dictionary with unique keys, any repetition of a word will be omitted
list_anagrams('ACES! I have aces in my case and some TOAD dato doat?')

aces has 1 anagram: case
aces has 1 anagram: case
case has 1 anagram: aces
toad has 2 anagrams: dato, doat
dato has 2 anagrams: doat, toad
doat has 2 anagrams: dato, toad


#### Challenge 4
Write an email validation function, returning True or False depending on whether the email address is correctly formatted.

In [7]:
def email_is_valid(s):
    # we'll validate iteratively by testing several criteria, and return True only if all the tests pass
    # does the email contain an @?
    try:
        local, domain = s.split('@')
    except ValueError:
        # if there isn't exactly one @ in the string,
        # split will return a ValueError: too many/not enough values to unpack
        return False
    
    if (not local) or (not domain):
        # this will catch addresses like @domain.com or user@
        return False
    
    # let's make sure that only legit characters are contained in the local and domain parts, 
    # cf. https://en.wikipedia.org/wiki/Email_address#Local-part
    allowed_chars_local = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!#$%&'*+-/=?^_`{|}~."
    if [c for c in local if c not in allowed_chars_local]:
        return False
    
    allowed_chars_domain = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-.'
    if [c for c in domain if c not in allowed_chars_domain]:
        return False
    
    # testing whether the TLD exists goes beyond the scope of this exercise, so let's just make sure that
    # there's at least one period in the domain, and that it's neither at the end nor the beginning
    period_index = domain.find('.')
    if domain.endswith('.') or (period_index in [-1, 0]): # -1 means it's not in the string
        return False
    
    return True

In [8]:
email_is_valid('someuser@somedomain.com')

True

In [9]:
email_is_valid('someuser@')

False

In [10]:
email_is_valid('someuser@.com')

False

In [11]:
email_is_valid('some user@somedomain.com')

False