In [1]:
tweet = {
    "user" : "joelgrus",
    "text" : "Data Science is Awesome",
    "retweet_count" : 100,
    "hashtags" : ["#data", "#science", "#datascience", "#awesome", "#yolo"]
}

In [2]:
tweet

{'user': 'joelgrus',
 'text': 'Data Science is Awesome',
 'retweet_count': 100,
 'hashtags': ['#data', '#science', '#datascience', '#awesome', '#yolo']}

## Slicing the dictionary

In [5]:
tweet["hashtags"]

['#data', '#science', '#datascience', '#awesome', '#yolo']

In [6]:
tweet["hashtags"][:3]

['#data', '#science', '#datascience']

In [7]:
tweet["hashtags"][::2]

['#data', '#datascience', '#yolo']

In [8]:
tweet["hashtags"][-2]

'#awesome'

In [13]:
tweet["hashtags"][-3:]

['#datascience', '#awesome', '#yolo']

In [21]:
tweet["hashtags"][-3:-1]

['#datascience', '#awesome']

In [22]:
tweet["hashtags"][0]

'#data'

## Dictionary Key, Values, items

In [23]:
tweet_keys   = tweet.keys()     # iterable for the keys
tweet_keys

dict_keys(['user', 'text', 'retweet_count', 'hashtags'])

In [24]:
tweet_values = tweet.values()   # iterable for the values
tweet_values

dict_values(['joelgrus', 'Data Science is Awesome', 100, ['#data', '#science', '#datascience', '#awesome', '#yolo']])

In [25]:
tweet_items  = tweet.items()    # iterable for the (key, value) tuples
tweet_items

dict_items([('user', 'joelgrus'), ('text', 'Data Science is Awesome'), ('retweet_count', 100), ('hashtags', ['#data', '#science', '#datascience', '#awesome', '#yolo'])])

## Two different ways to achieve the same results

In [26]:
document="It is a collection of beliefs and theory that together rationalize the trade-offs and compromises societes make-play a crucial role\n"
document+="A ideology shapes the way a society is organized and therefore informs the arguments for an \"inequality regime\" "

In [31]:
letter_counts = {}
for letter in document:
    if letter in letter_counts:
        letter_counts[letter] += 1
    else:
        letter_counts[letter] = 1

In [32]:
print(letter_counts)

{'I': 1, 't': 17, ' ': 37, 'i': 16, 's': 13, 'a': 19, 'c': 7, 'o': 18, 'l': 9, 'e': 27, 'n': 10, 'f': 7, 'b': 1, 'd': 6, 'h': 8, 'r': 14, 'y': 6, 'g': 5, 'z': 2, '-': 2, 'm': 6, 'p': 3, 'k': 1, 'u': 3, '\n': 1, 'A': 1, 'w': 1, '"': 2, 'q': 1}


In [35]:
letter_counts = {}
# iterate of each letter in the document
for letter in document:
    # get the count from previous iteration, if the letter is not found then return 0
    previous_count = letter_counts.get(letter, 0)
    # the letter will be the index for the list
    letter_counts[letter] = previous_count + 1

0
0
0
0
0
1
0
2
0
0
0
1
0
1
1
1
1
0
3
2
0
4
0
1
2
2
2
1
1
5
1
1
0
6
2
0
3
3
0
0
7
3
1
2
4
8
5
4
0
4
6
2
5
1
9
2
3
7
3
5
2
4
3
4
0
6
10
8
3
7
11
9
3
5
1
8
0
6
2
3
2
12
6
3
2
13
2
7
0
0
4
8
1
5
3
9
4
14
5
9
3
6
10
10
11
6
15
2
7
0
12
1
1
4
8
1
16
9
17
4
5
0
5
7
10
5
18
6
10
6
13
0
0
19
8
3
14
11
7
12
1
2
20
7
4
11
2
15
8
21
11
5
16
22
0
12
3
23
13
24
9
13
6
9
17
12
4
25
10
10
26
14
7
2
14
4
11
1
18
4
27
15
5
5
28
13
6
19
8
20
4
15
9
21
29
12
6
5
16
10
3
11
30
14
7
22
31
16
11
3
1
4
23
7
15
12
32
6
17
12
33
17
8
34
0
13
9
24
0
2
18
8
14
16
5
35
13
25
4
15
5
26
1
36


In [34]:
print(letter_counts)

{'I': 1, 't': 17, ' ': 37, 'i': 16, 's': 13, 'a': 19, 'c': 7, 'o': 18, 'l': 9, 'e': 27, 'n': 10, 'f': 7, 'b': 1, 'd': 6, 'h': 8, 'r': 14, 'y': 6, 'g': 5, 'z': 2, '-': 2, 'm': 6, 'p': 3, 'k': 1, 'u': 3, '\n': 1, 'A': 1, 'w': 1, '"': 2, 'q': 1}


## Initialize empty list, dict, pair

In [36]:
from collections import defaultdict

In [37]:
dd_list = defaultdict(list)             # list() produces an empty list
dd_list[2].append(1)                    # now dd_list contains {2: [1]}

print(dd_list)                 

defaultdict(<class 'list'>, {2: [1]})


In [38]:
dd_dict = defaultdict(dict)             # dict() produces an empty dict
dd_dict["Joel"]["City"] = "Seattle"     # {"Joel" : {"City": Seattle"}}

print(dd_dict) 

defaultdict(<class 'dict'>, {'Joel': {'City': 'Seattle'}})


In [39]:
dd_pair = defaultdict(lambda: [0, 0])  # now dd_pair contains {2: [0, 1]}
dd_pair[2][1] = 1   

print(dd_pair) 

defaultdict(<function <lambda> at 0x7fca2c6fbbf8>, {2: [0, 1]})


## Counter

In [40]:
from collections import Counter

In [41]:
c = Counter([0, 1, 2, 0])          # c is (basically) {0: 2, 1: 1, 2: 1}

In [42]:
print(c)

Counter({0: 2, 1: 1, 2: 1})


## Count Letter more effectivelly

In [45]:
letter_counts = Counter(document)

In [46]:
print(letter_counts)

Counter({' ': 37, 'e': 27, 'a': 19, 'o': 18, 't': 17, 'i': 16, 'r': 14, 's': 13, 'n': 10, 'l': 9, 'h': 8, 'c': 7, 'f': 7, 'd': 6, 'y': 6, 'm': 6, 'g': 5, 'p': 3, 'u': 3, 'z': 2, '-': 2, '"': 2, 'I': 1, 'b': 1, 'k': 1, '\n': 1, 'A': 1, 'w': 1, 'q': 1})


### Print the 10 most common words and their counts

In [47]:
for word, count in word_counts.most_common(10):
    print(word, count)

  37
e 27
a 19
o 18
t 17
i 16
r 14
s 13
n 10
l 9


## Sets
Another useful data structure is set, which represents a collection of distinct elements. You can define a set by listing its elements between curly braces:

In [56]:
hundreds_of_other_words = ["itamar","antonio","tafarello"]
stopwords_list = ["a", "an", "at"] + hundreds_of_other_words + ["yet", "you","an"]

"zip" in stopwords_list     # False, but have to check every element
print(stopwords_list)

['a', 'an', 'at', 'itamar', 'antonio', 'tafarello', 'yet', 'you', 'an']


### Notice the "an" shows up only once now

In [58]:
stopwords_set = set(stopwords_list)
"zip" in stopwords_set      # very fast to check
print(stopwords_set)

{'tafarello', 'an', 'a', 'itamar', 'you', 'yet', 'antonio', 'at'}


### Inline controlf low

In [62]:
x = 2
parity = "even" if x % 2 == 0 else "odd"
print(parity)

even


In [66]:
for x in range(10):
    if x == 3:
        print("continue to the next iteration")
        continue  # go immediately to the next iteration
    if x == 5:
        print("break the iteration and get out of the for loop")
        break     # quit the loop entirely
    print(x)

0
1
2
continue to the next iteration
4
break the iteration and get out of the for loop


## Assert

In [69]:
#x = None
#print(assert x == None) # "this is the not the Pythonic way to check for None"
#print(assert x is None) # "this is the Pythonic way to check for None"

In [81]:
s = "itamar antonio tafarello"
s=""
if s:
    first_char = s[0]
else:
    first_char = "."

In [82]:
print(first_char)

.


In [83]:
first_char = s and s[0]

In [84]:
print(first_char)




### Make a variable safe

In [97]:
x=""
safe_x = x or 0

In [98]:
print(safe_x)

0


In [99]:
safe_x = x if x is not None else 0

In [100]:
print(safe_x)




### Are all elements in conditions to be valid?

In [102]:
all([True, 1, {3}])   # True, all are truthy


True

## One of the element is empty 

In [112]:
all([True, 1, {}])    # False, {} is falsy


False

In [114]:
any([True, 1, {}])    # True, True is truthy


True

In [105]:
all([])               # True, no falsy elements in the list


True

In [106]:
any([])               # False, no truthy elements in the list

False

### Sorting elements

In [120]:
x = [4, 1, 2, 3]
print(x)

[4, 1, 2, 3]


In [121]:
y = sorted(x)     # y is [1, 2, 3, 4], x is unchanged
print(f"x={x}")
print(f"y={y}")

x=[4, 1, 2, 3]
y=[1, 2, 3, 4]


In [122]:
x.sort()          # now x is [1, 2, 3, 4]
print(x)

[1, 2, 3, 4]


## Sorted the list using o absolute value of each element, in descending order

In [125]:
# sort the list by absolute value from largest to smallest
x = sorted([-4, 1, -2, 3], key=abs, reverse=True)  # is [-4, 3, -2, 1]

print(x)

[-4, 3, -2, 1]


## Sorting the list using the first element of each tuple, letter_and_count[0] (x, y), in this case using x

In [128]:
# sort the words and counts from highest count to lowest
wc = sorted(letter_counts.items(),
            key=lambda letter_and_count: letter_and_count[0],
            reverse=True)
print(wc)

[('z', 2), ('y', 6), ('w', 1), ('u', 3), ('t', 17), ('s', 13), ('r', 14), ('q', 1), ('p', 3), ('o', 18), ('n', 10), ('m', 6), ('l', 9), ('k', 1), ('i', 16), ('h', 8), ('g', 5), ('f', 7), ('e', 27), ('d', 6), ('c', 7), ('b', 1), ('a', 19), ('I', 1), ('A', 1), ('-', 2), ('"', 2), (' ', 37), ('\n', 1)]


## Sorting the list using the first element of each tuple, letter_and_count[1] (x, y), in this case using y

In [129]:
# sort the words and counts from highest count to lowest
wc = sorted(letter_counts.items(),
            key=lambda letter_and_count: letter_and_count[1],
            reverse=True)
print(wc)

[(' ', 37), ('e', 27), ('a', 19), ('o', 18), ('t', 17), ('i', 16), ('r', 14), ('s', 13), ('n', 10), ('l', 9), ('h', 8), ('c', 7), ('f', 7), ('d', 6), ('y', 6), ('m', 6), ('g', 5), ('p', 3), ('u', 3), ('z', 2), ('-', 2), ('"', 2), ('I', 1), ('b', 1), ('k', 1), ('\n', 1), ('A', 1), ('w', 1), ('q', 1)]


## List Comprehensions

In [130]:
even_numbers = [x for x in range(5) if x % 2 == 0]  # [0, 2, 4]
print(even_numbers)

[0, 2, 4]


In [132]:
squares      = [x * x for x in range(5)]            # [0, 1, 4, 9, 16]
print(squares)

[0, 1, 4, 9, 16]


In [133]:
even_squares = [x * x for x in even_numbers]        # [0, 4, 16]
print(even_squares)

[0, 4, 16]


## Loop trough a range of 5 elements and create a new list

In [134]:
square_dict = {x: x * x for x in range(5)}  # {0: 0, 1: 1, 2: 4, 3: 9, 4: 16}
print(square_dict)

{0: 0, 1: 1, 2: 4, 3: 9, 4: 16}


In [135]:
square_set  = {x * x for x in [1, -1]}      # {1}
print(square_set)

{1}


### Generate a list using a inline for

In [147]:
pairs = [(x, y)
         for x in range(5)
         for y in range(5)]   # 100 pairs (0,0) (0,1) ... (9,8), (9,9)

In [148]:
print("Generated List\n")
print(pairs)
print("\n")
print(f"Total number of elements in the list {len(pairs)}")

Generated List

[(0, 0), (0, 1), (0, 2), (0, 3), (0, 4), (1, 0), (1, 1), (1, 2), (1, 3), (1, 4), (2, 0), (2, 1), (2, 2), (2, 3), (2, 4), (3, 0), (3, 1), (3, 2), (3, 3), (3, 4), (4, 0), (4, 1), (4, 2), (4, 3), (4, 4)]


Total number of elements in the list 25
