<a href="https://colab.research.google.com/github/inderpreetsingh01/PyMath/blob/main/Python4DA_Ch3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Built-In Data Structures, Functions, and Files

A tuple is a fixed-length, immutable sequence of Python objects which, once assigned, cannot be changed.

In [None]:
tup = (4, 5, 6)

In [None]:
tup

(4, 5, 6)

In [None]:
tup = tuple(['foo', [1, 2], True])

In [None]:
tup[0] = 1

TypeError: 'tuple' object does not support item assignment

In [None]:
# tuple elements if mutable can be modified inplace
tup[1][0] = 3

In [None]:
tup

('foo', [3, 2], True)

In [None]:
('foo', 'bar') + ('foo', 'bar')

('foo', 'bar', 'foo', 'bar')

In [None]:
('foo', 'bar') * 4

('foo', 'bar', 'foo', 'bar', 'foo', 'bar', 'foo', 'bar')

In [None]:
# unpacking tuple
tup = (1,2,3)
a,b,c = tup

In [None]:
a

1

In [None]:
b

2

In [None]:
c

3

In [None]:
a, b = 1, 2

In [None]:
b, a = a, b

In [None]:
b

1

In [None]:
a

2

In [None]:
seq = [(1, 2, 3), (4, 5, 6), (7, 8, 9)]

In [None]:
# use case of unpacking, iterating over sequence of tuple of list
for a, b, c in seq:
   print(f'a={a}, b={b}, c={c}')

a=1, b=2, c=3
a=4, b=5, c=6
a=7, b=8, c=9


In [None]:
values = 1, 2, 3, 4, 5
a, b, *rest = values

In [None]:
a

1

In [None]:
b

2

In [None]:
rest

[3, 4, 5]

In [None]:
# count method
a = (1, 2, 2, 2, 3, 4, 2)
a.count(2)

4

In [1]:
a = [1, 2, 2, 2, 3, 4, 2]
a.count(2)

4

# List
In contrast with tuples, lists are variable length and their contents can be modified in place. Lists are mutable.

In [None]:
tup = ("foo", "bar", "baz")
b_list = list(tup)

In [None]:
# insert at the end
b_list.append("dwarf")

In [None]:
b_list

['foo', 'bar', 'baz', 'dwarf']

In [None]:
# insert at given index
b_list.insert(1, "red")

In [None]:
b_list

['foo', 'red', 'bar', 'baz', 'dwarf']

In [None]:
# pop from the given index
b_list.pop(2)

'bar'

In [None]:
b_list

['foo', 'red', 'baz', 'dwarf']

In [None]:
# Elements can be removed by value with remove, which locates the first such value and removes it from the list:
b_list.remove("foo")

In [None]:
b_list

['red', 'baz', 'dwarf']

In [None]:
"dwarf" in b_list

True

In [None]:
"dwarf" not in b_list

False

In [None]:
# Checking whether a list contains a value is a lot slower than doing so with dictionaries and sets (to be introduced shortly), as Python makes a linear scan across the values of the list, whereas it can check the others (based on hash tables) in constant time.

In [None]:
# Note that list concatenation by addition is a comparatively expensive operation since a new list must be created and the objects copied over. Using extend to append elements to an existing list, especially if you are building up a large list, is usually preferable.

In [None]:
x = [4, None, "foo"]

In [None]:
x.extend([7, 8, (2, 3)])

In [None]:
x

[4, None, 'foo', 7, 8, (2, 3)]

In [None]:
[4, None, "foo"] + [7, 8, (2, 3)]

[4, None, 'foo', 7, 8, (2, 3)]

In [None]:
# In place Sorting
a = [7, 2, 5, 1, 3]
a.sort()

In [None]:
a

[1, 2, 3, 5, 7]

In [None]:
b = ["saw", "small", "He", "foxes", "six"]

In [None]:
b.sort(key=len)

In [None]:
b

['He', 'saw', 'six', 'small', 'foxes']

In [None]:
seq = [7, 2, 3, 7, 5, 6, 0, 1]

In [None]:
seq[-4:]

[5, 6, 0, 1]

In [None]:
# slicing but with step size
seq[::2]

[7, 3, 5, 0]

In [None]:
# iterating list in reverse
seq[::-1]

[1, 0, 6, 5, 7, 3, 2, 7]

# Dictionary


In [None]:
d1 = {'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 5: 'some value'}

In [None]:
d1["dummy"] = "another value"

In [None]:
del d1[5]

In [None]:
d1

{'a': 'some value',
 'b': [1, 2, 3, 4],
 7: 'an integer',
 'dummy': 'another value'}

In [None]:
ret = d1.pop("dummy")

In [None]:
ret

'another value'

In [None]:
d1

{'a': 'some value', 'b': [1, 2, 3, 4], 7: 'an integer'}

In [None]:
# You can merge one dictionary into another using the update method:

In [None]:
d1.update({"b": "foo", "c": 12})

In [None]:
# The update method changes dictionaries in place, so any existing keys in the data passed to update will have their old values discarded.

In [None]:
d1

{'a': 'some value', 'b': 'foo', 7: 'an integer', 'c': 12}

In [None]:
value = d1.get('d', 0)

In [None]:
value

0

In [None]:
from collections import defaultdict

by_letter = defaultdict(list)
words = ["apple", "bat", "bar", "atom", "book"]

for word in words:
   by_letter[word[0]].append(word)

In [None]:
by_letter

defaultdict(list, {'a': ['apple', 'atom'], 'b': ['bat', 'bar', 'book']})

In [None]:
# Valid dictionary key types
# While the values of a dictionary can be any Python object, the keys generally have to be immutable objects like scalar types (int, float, string) or tuples (all the objects in the tuple need to be immutable, too). The technical term here is hashability. You can check whether an object is hashable (can be used as a key in a dictionary) with the hash function:

In [None]:
hash("string")

-3034252750761393648

In [None]:
hash((1, 2, [2, 3]))

TypeError: unhashable type: 'list'

# Set
A set is an unordered collection of unique elements. A set can be created in two ways: via the set function or via a set literal with curly braces:

In [None]:
set([2, 2, 2, 1, 3, 3])

{1, 2, 3}

In [None]:
{2, 2, 2, 1, 3, 3}

{1, 2, 3}

In [None]:
a = {1, 2, 3, 4, 5}
b = {3, 4, 5, 6, 7, 8}

In [None]:
a.union(b)

{1, 2, 3, 4, 5, 6, 7, 8}

In [None]:
# union
a | b

{1, 2, 3, 4, 5, 6, 7, 8}

In [None]:
a.intersection(b)

{3, 4, 5}

In [None]:
# intersection
a & b

{3, 4, 5}

In [None]:
a.symmetric_difference(b)

In [None]:
a ^ b

{1, 2, 6, 7, 8}

In [None]:
a.issubset(b)

False

In [None]:
a.issuperset(b)

False

In [None]:
a.isdisjoint(b)

False

In [None]:
# Like dictionary keys, set elements generally must be immutable, and they must be hashable (which means that calling hash on a value does not raise an exception).

In [None]:
# Sets are equal if and only if their contents are equal:

In [None]:
{1, 2, 3} == {3, 2, 1}

True

# Built-In Sequence Functions
1. enumerate
2. sorted
3. zip: zip can take an arbitrary number of sequences, and the number of elements it produces is determined by the shortest sequence, A common use of zip is simultaneously iterating over multiple sequences.
4. reversed: reversed iterates over the elements of a sequence in reverse order, it is a generator.



In [None]:
list(reversed(range(10)))

[9, 8, 7, 6, 5, 4, 3, 2, 1, 0]

# List, Dict and Set Comprehension

In [None]:
# list
strings = ["a", "as", "bat", "car", "dove", "python"]
[x.upper() for x in strings if len(x) > 2]

['BAT', 'CAR', 'DOVE', 'PYTHON']

In [None]:
[x.upper() if len(x) > 2 else 0 for x in strings]

[0, 0, 'BAT', 'CAR', 'DOVE', 'PYTHON']

In [None]:
# set
unique_lengths = {len(x) for x in strings}
unique_lengths

{1, 2, 3, 4, 6}

In [None]:
set(map(len, strings))

{1, 2, 3, 4, 6}

In [None]:
all_data = [["John", "Emily", "Michael", "Mary", "Steven"],
             ["Maria", "Juan", "Javier", "Natalia", "Pilar"]]

In [None]:
# Nested List Comprehension
# Keep in mind that the order of the for expressions would be the same if you wrote a nested for loop instead of a list comprehension:
# outer loop and then inner loop
result = [name for names in all_data for name in names if name.count("a") >= 2]

In [None]:
result

['Maria', 'Natalia']

# Functions

In [None]:
The main restriction on function arguments is that the keyword arguments must follow the positional arguments

In [2]:
states = ["   Alabama ", "Georgia!", "Georgia", "georgia", "FlOrIda", "south   carolina##", "West virginia?"]

# Functions Are Objects

In [7]:
def clean_strings(strings, ops):
    result = []
    for value in strings:
        for func in ops:
            value = func(value)
        result.append(value)
    return result

In [6]:
import re
def remove_punctuation(value):
    return re.sub("[!#?]", "", value)

clean_ops = [str.strip, remove_punctuation, str.title]

In [8]:
clean_strings(states, clean_ops)

['Alabama',
 'Georgia',
 'Georgia',
 'Georgia',
 'Florida',
 'South   Carolina',
 'West Virginia']

In [9]:
for x in map(remove_punctuation, states):
  print(x)

   Alabama 
Georgia
Georgia
georgia
FlOrIda
south   carolina
West virginia


# Anonymous (Lambda) Functions

In [10]:
def apply_to_list(some_list, f):
  return [f(x) for x in some_list]

ints = [4, 0, 1, 5, 6]

out = apply_to_list(ints, lambda x: x * 2)

In [11]:
out

[8, 0, 2, 10, 12]

In [12]:
strings = ["foo", "card", "bar", "aaaa", "abab"]

In [13]:
strings.sort(key=lambda x: len(set(x)))

In [14]:
strings

['aaaa', 'foo', 'abab', 'bar', 'card']

#Generators

In [None]:
# A generator is a convenient way, similar to writing a normal function, to construct a new iterable object. Whereas normal functions execute and return a single result at a time,
# generators can return a sequence of multiple values by pausing and resuming execution each time the generator is used.
# To create a generator, use the yield keyword instead of return in a function:
# Since generators produce output one element at a time versus an entire list all at once, it can help your program use less memory.

In [15]:
def squares(n=10):
    print(f"Generating squares from 1 to {n ** 2}")
    for i in range(1, n + 1):
        yield i ** 2

In [16]:
gen = squares()

## Generator expressions

In [None]:
# Another way to make a generator is by using a generator expression. This is a generator analogue to list, dictionary, and set comprehensions. To create one, enclose what would otherwise be a list comprehension within parentheses instead of brackets:

In [20]:
gen = (x ** 2 for x in range(100))

In [18]:
def _make_gen():
    for x in range(100):
        yield x ** 2

In [19]:
sum(x ** 2 for x in range(100))

328350

In [None]:
sum(x ** 2 for x in range(100))

In [24]:
for i in gen:
  print(i)

# itertools module

In [None]:
# The standard library itertools module has a collection of generators for many common data algorithms. For example, groupby takes any sequence and a function, grouping consecutive elements in the sequence by return value of the function.

In [25]:
import itertools
def first_letter(x):
  return x[0]

In [26]:
names = ["Alan", "Adam", "Wes", "Will", "Albert", "Steven"]

In [27]:
for letter, names in itertools.groupby(names, first_letter):
  print(letter, list(names))

A ['Alan', 'Adam']
W ['Wes', 'Will']
A ['Albert']
S ['Steven']


In [28]:
def attempt_float(x):
    try:
        return float(x)
    except (TypeError, ValueError):
        return x

In [None]:
f = open(path, mode="w")

try:
    write_to_file(f)
finally:
    f.close()