# Python Data Structure

### Loading Libraries

In [123]:
# Math
import math
from math import hypot

# Numerical Computing
import numpy as np

# Data Manipulation
import pandas as pd

# Data Visualization
import seaborn
import matplotlib.pyplot as plt

#
from pprint import pprint

# OS
import re
import sys
import abc
import time
import queue
import string
import random
import bisect
import operator
import datetime
from decimal import Decimal

# Types & Annotations
import collections
from __future__ import annotations
from collections import defaultdict, Counter
from collections.abc import Container, Mapping, Hashable
from typing import Hashable, Mapping, TypeVar, Any, overload, Union, Sequence, Dict, Deque
from typing import List, Protocol, NoReturn, Union, Set, Tuple, Optional, Iterable, Iterator, cast, NamedTuple
# from typing import 

# Functional Tools
from functools import wraps, total_ordering

# Files & Path
import logging
import zipfile
import fnmatch
from pathlib import Path
from urllib.request import urlopen

#
from dataclasses import dataclass

In [2]:
Comparable = TypeVar('Comparable')

BaseMapping = Mapping[Comparable, Any]

### Empty Objects

In [3]:
o = object()

o.x = 5

AttributeError: 'object' object has no attribute 'x'

In [None]:
class MyObject:
    ...

In [None]:
m = MyObject()

m.x = "hello"
m.x

### Tuples & Named Tuples

In [4]:
stock = "APPL", 123.52, 53.15, 137.98

stock2 = ("APPL", 123.52, 53.15, 137.98)

In [5]:
def middle(stock, date):
    symbol, current, high, low = stock
    return (((high + low) / 2), date)

In [6]:
middle(("APPL", 123.52, 53.15, 137.98), datetime.date(2020, 12, 4))

(95.565, datetime.date(2020, 12, 4))

In [7]:
a = 42
a

42

In [8]:
b = (42, 3.14), (2.718, 2.618),
b

((42, 3.14), (2.718, 2.618))

In [9]:
s = "APPL", 132.76, 134.80, 130.53

In [10]:
high = s[2]
high

134.8

In [11]:
s[1:3]

(132.76, 134.8)

In [12]:
def high(stock):
    symbol, current, high, low = stock
    return high

In [13]:
high(s)

134.8

### Named Tuples via: `typing.NamedTuple`

In [14]:
class Stock(NamedTuple):
    symbol: str
    current: float
    high:float
    low: float

In [15]:
Stock("APPL", 123.52, 137.98, 53.15)

Stock(symbol='APPL', current=123.52, high=137.98, low=53.15)

In [16]:
s2 = Stock("APPL", 123.52, high=137.98, low=53.15)

In [17]:
s2.high

137.98

In [18]:
s[2]

134.8

In [19]:
symbol, current, high, low = s

In [20]:
current

132.76

In [21]:
s.current = 122.25

AttributeError: 'tuple' object has no attribute 'current'

In [22]:
t = ("Relayer", ["Gates of Delirium", "Sound of Chaser"])

t[1].append("To Be Over")

In [23]:
t

('Relayer', ['Gates of Delirium', 'Sound of Chaser', 'To Be Over'])

In [24]:
hash(t)

TypeError: unhashable type: 'list'

In [25]:
class Stock(NamedTuple):
    symbol: str
    current: float
    high: float
    low: float
    @property
    def middle(self) -> float:
        return (self.high + self.low) / 2

In [26]:
s = Stock("APPL", 123.52, 137.98, 53.15)
s.middle

95.565

### Dataclasses

In [27]:
@dataclass
class Stock:
    symbol: str
    current: float
    high: float
    low:float

In [28]:
s = Stock("AAPL", 123.52, 137.98, 53.15)

In [29]:
s.current

123.52

In [30]:
s

Stock(symbol='AAPL', current=123.52, high=137.98, low=53.15)

In [31]:
s.unexpected_attribute = 'allowed'
s.unexpected_attribute

'allowed'

In [32]:
class StockOrdinary:
    def __init__(self, name: str, current: float, high: float, low: float) -> None:
        self.name = name
        self.current = current
        self.high = high
        self.low = low

In [33]:
s_ord = StockOrdinary("APPL", 123.52, 137.98, 53.15)

In [34]:
s_ord

<__main__.StockOrdinary at 0x1661d3210>

In [35]:
s_ord_2 = StockOrdinary("APPL", 123.52, 137.98, 53.15)

In [36]:
s_ord == s_ord_2

False

In [37]:
stock2 = Stock(symbol='AAPL', current=122.25, high=137.98, low=53.15)

In [38]:
s == stock2

False

In [39]:
@dataclass
class StockDefaults:
    name: str
    current: float = 0.0
    high: float = 0.0
    low: float = 0.0

In [40]:
StockDefaults("GOOG")

StockDefaults(name='GOOG', current=0.0, high=0.0, low=0.0)

In [41]:
StockDefaults("GOOG", 1826.77, 1847.20, 1013.54)

StockDefaults(name='GOOG', current=1826.77, high=1847.2, low=1013.54)

In [42]:
@dataclass(order=True)
class StockOrdered:
    name: str
    current: float = 0.0
    high: float = 0.0
    low: float = 0.0

In [43]:
stock_ordered1 = StockOrdered("GOOG", 1826.77, 1847.20, 1013.54)

In [44]:
stock_ordered2 = StockOrdered("GOOG")

In [45]:
stock_ordered3 = StockOrdered("GOOG", 1728.28, high=1733.18, low=1666.33)

In [46]:
stock_ordered1 < stock_ordered2

False

In [47]:
stock_ordered1 > stock_ordered2

True

In [48]:
pprint(sorted([stock_ordered1, stock_ordered2, stock_ordered3]))

[StockOrdered(name='GOOG', current=0.0, high=0.0, low=0.0),
 StockOrdered(name='GOOG', current=1728.28, high=1733.18, low=1666.33),
 StockOrdered(name='GOOG', current=1826.77, high=1847.2, low=1013.54)]


### Dictionaries

In [49]:
stocks = {
    "GOOG": (1235.20, 1242.54, 1231.06),
    "MSFT": (110.41, 110.45, 109.45),
}

In [50]:
stocks["GOOG"]

(1235.2, 1242.54, 1231.06)

In [51]:
stocks["RIMM"]

KeyError: 'RIMM'

In [52]:
print(stocks.get("RIMM"))

None


In [53]:
stocks.get("RIMM", "NOT FOUND")

'NOT FOUND'

In [54]:
stocks.setdefault("GOOG", "INVALID")

(1235.2, 1242.54, 1231.06)

In [55]:
stocks.setdefault("BB", (10.87, 10.76, 10.90))

(10.87, 10.76, 10.9)

In [56]:
stocks["BB"]

(10.87, 10.76, 10.9)

In [57]:
for stock, values in stocks.items():
    print(f"{stock} last value is {values[0]}")

GOOG last value is 1235.2
MSFT last value is 110.41
BB last value is 10.87


In [58]:
stocks["GOOG"] = (1245.21, 1252.64, 1245.18)

In [59]:
stocks['GOOG']

(1245.21, 1252.64, 1245.18)

In [60]:
random_keys = {}

random_keys["astring"] = "somestring"
random_keys[5] = "aninteger"
random_keys[25.2] = "floats work too"
random_keys[("abc", 123)] = "so do tuples"

In [61]:
class AnObject:
    def __init__(self, avalue):
        self.avalue = avalue

In [62]:
my_object = AnObject(14)

In [63]:
random_keys[my_object] = "We can even store objects"

In [64]:
my_object.avalue = 12

In [65]:
random_keys[[1, 2, 3]] = "We can't use lists as keys"

TypeError: unhashable type: 'list'

In [66]:
for key in random_keys:
    print(f"{key!r} has value {random_keys[key]!r}")

'astring' has value 'somestring'
5 has value 'aninteger'
25.2 has value 'floats work too'
('abc', 123) has value 'so do tuples'
<__main__.AnObject object at 0x1661fe1d0> has value 'We can even store objects'


In [67]:
x = 2020
y = 2305843009213695971

In [68]:
hash(x) == hash(y)

True

In [69]:
x == y

False

### Using `defaultdict`

In [70]:
def letter_frequency(sentence: str) -> dict[str, int]:
    frequencies: dict[str, int] = {}
    for letter in sentence:
        frequency = frequencies.setdefault(letter, 0)
        frequencies[letter] = frequency + 1
    return frequencies

In [71]:
def letter_frequency_2(sentence: str) -> defaultdict[str, int]:
    frequencies: defaultdict[str, int] = default(int)
    for letter in sequence:
        frequencies[letter] += 1
    return frequencies

In [72]:
@dataclass
class Prices:
    current: float = 0.0
    high: float = 0.0
    low: float = 0.0
    

In [73]:
Prices()

Prices(current=0.0, high=0.0, low=0.0)

In [74]:
portfolio = collections.defaultdict(Prices)

In [75]:
portfolio["GOOG"]

Prices(current=0.0, high=0.0, low=0.0)

In [76]:
portfolio["AAPL"] = Prices(current=122.25, high=137.98, low=53.15)

In [77]:
pprint(portfolio)

defaultdict(<class '__main__.Prices'>,
            {'AAPL': Prices(current=122.25, high=137.98, low=53.15),
             'GOOG': Prices(current=0.0, high=0.0, low=0.0)})


In [78]:
def make_defaultdict():
    return collections.defaultdict(Prices) 

In [79]:
by_month = collections.defaultdict(lambda: collections.defaultdict(Prices))

In [80]:
by_month["APPL"]["Jan"] = Prices(current=122.25, high=137.98, low=53.15)

### Counter

In [81]:
responses = [
    "vanilla",
    "chocolate",
    "vanilla",
    "vanilla",
    "caramel",
    "strawberry",
    "vanilla"
]

In [82]:
favorites = collections.Counter(responses).most_common(1)

In [83]:
name, frequency = favorites[0]

In [84]:
name

'vanilla'

### List

In [85]:
CHARACTERS = list(string.ascii_letters) + [" "]

In [86]:
def letter_frequency(sentence: str) -> list[tuple[str, int]]:
    frequencies = [(c, 0) for c in CHARACTERS]
    for letter in sentence:
        index = CHARACTERS.index(letter)
        frequencies[index] = (letter, frequencies[index][1] + 1)
    non_zero = [
        (letter, count)
        for letter, count in frequencies if count > 0
    ]
    return non_zero

### Sorting List

In [87]:
@dataclass(frozen=True)
class MultiItem:
    data_source: str
    timestamp: Optional[str]
    creation_date: Optional[str]
    name: str
    owner_etc: str

    def __lt__(self, other: Any) -> bool:
        if self.data_source == "Local":
            self_datetime = datetime.datetime.fromtimestamp(cast(float, self.timestamp))
        else:
            self_datetime = datetime.datetime.fromisoformat(cast(str, self.creation_date))
        if other.data_source == "Loca":
            other_datetime = datetime.datetime.froisostamp(cast(float, other.timestamp))
        else:
            other_datetime = datetime.datetime.fromisoformat(cast(str, other.creation_date))
        return self_datetime < other_datetime               

In [88]:
mi_0 = MultiItem("Local", 16077280522.68012, None, "Some File", "etc. 0")

In [89]:
mi_1 = MultiItem("Remote", None, "2020-12-06T13:47:52.849153", "Another File", "etc. 1")

In [90]:
mi_2 = MultiItem("Local", 1579373292.452993, None, "This File", "etc. 2")

In [91]:
mi_3 = MultiItem("Remote", None, "2020-01-18T13:48:12.452993", "That File", "etc. 3")

In [92]:
file_list = [mi_0, mi_1, mi_2, mi_3]

In [93]:
# file_list.sort()

In [94]:
pprint(file_list)

[MultiItem(data_source='Local',
           timestamp=16077280522.68012,
           creation_date=None,
           name='Some File',
           owner_etc='etc. 0'),
 MultiItem(data_source='Remote',
           timestamp=None,
           creation_date='2020-12-06T13:47:52.849153',
           name='Another File',
           owner_etc='etc. 1'),
 MultiItem(data_source='Local',
           timestamp=1579373292.452993,
           creation_date=None,
           name='This File',
           owner_etc='etc. 2'),
 MultiItem(data_source='Remote',
           timestamp=None,
           creation_date='2020-01-18T13:48:12.452993',
           name='That File',
           owner_etc='etc. 3')]


In [95]:
@total_ordering
@dataclass(frozen=True)
class MultiItem:
    data_source: str
    timestamp: Optional[float]
    creation_date: Optional[str]
    name: str
    owner_etc: str

    def __lt__(self, other: "MultiItem") -> bool:
        Exercise: rewrite this to follow the example of __eq__

    def __eq__(self, other: object) -> bool:
        return self.datetime == cast(MultiItem, other).datetime

    @property
    def datetime(self) -> datetime.datetime:
        if self.data_source == "Local":
            return datetime.datetime.fromtimestamp(cast(float, self.timestamp))
        else:
            return datetime.datetime.fromisoformat(cast(str, self.creation_date))

SyntaxError: invalid syntax (1674962030.py, line 11)

In [96]:
@dataclass(frozen=True)
class SimpleMultiItem:
    data_source: str
    timestamp: Optional[float]
    creation_date: Optional[str]
    name: str
    owner_etc: str

def by_timestamp(item: SimpleMultiItem) -> datetime.datetime:
    if item.data_source == "Local":
        return datetime.datetime.fromtimestamp(cast(float, item.timestamp))
    elif item.data_source == "Remote":
        return datetime.datetime.fromisoformat(cast(str, item.creation_date))
    else:
        raise ValueError(f"Unknown data_source in {item!r}")

In [97]:
file_list.sort(key=by_timestamp)

In [98]:
file_list.sort(key=lambda item: item.name)

In [99]:
file_list.sort(key=operator.attrgetter("name"))

### Sets

In [100]:
song_library = [
    ("Phantom of The Opera", "Sarah Brightman"),
    ("Knocking on Heaven's Door", "Gun N' Roses"),
    ("Captain Nemo", "Sarah Brightman"),
    ("Patterns In The Ivy", "Opeth"),
    ("November Rain", "Gun N' Roses"),
    ("Beautiful", "Sarah Brightman"),
    ("Mal's Song", "Vixy and Tony"),
]

In [101]:
artists = set()

In [102]:
for song, artist in song_library:
    artists.add(artist)

In [103]:
"Opeth" in artists

True

In [104]:
alphabetical = list(artists)

In [105]:
alphabetical.sort()

In [106]:
alphabetical

["Gun N' Roses", 'Opeth', 'Sarah Brightman', 'Vixy and Tony']

In [107]:
for artist in artists:
    print(f"{artist} plays good music")

Opeth plays good music
Gun N' Roses plays good music
Vixy and Tony plays good music
Sarah Brightman plays good music


In [108]:
dusty_artists = {
    "Sarah Brightman",
    "Gun N' Roses",
    "Opeth",
    "Vixy and Tony",
}

In [109]:
steve_artists = {"Yes", "Gun N' Roses", "Genesis"}

In [110]:
print(f"All {dusty_artists | steve_artists}")

All {'Yes', "Gun N' Roses", 'Vixy and Tony', 'Sarah Brightman', 'Genesis', 'Opeth'}


In [111]:
print(f"Both: {dusty_artists.intersection(steve_artists)}")

Both: {"Gun N' Roses"}


In [112]:
print(f"Either but not both: {dusty_artists ^ steve_artists}")

Either but not both: {'Vixy and Tony', 'Genesis', 'Sarah Brightman', 'Opeth', 'Yes'}


In [113]:
artists = {"Gun N' Roses", 'Vixy and Tony', 'Sarah Brightman', 'Opeth'}

In [114]:
bands = {"Opeth", "Gun N' Roses"}

In [115]:
artists.issuperset(bands)

True

In [116]:
artists.issubset(bands)

False

In [117]:
artists - bands

{'Sarah Brightman', 'Vixy and Tony'}

In [118]:
bands.issuperset(artists)

False

In [119]:
bands.issubset(artists)

True

In [120]:
bands.difference(artists)

set()

### Three Types of Queues

In [121]:
class ListQueue(List[Path]):
    def put(self, item: Path) -> None:
        self.append(item)

    def get(self) -> Path:
        return self.pop(0)

    def empty(self) -> bool:
        return len(self) == 0

In [124]:
class DeQueue(Deque[Path]):
    def put(self, item: Path) -> None:
        self.append(item)

    def get(self) -> Path:
        return self.popleft()

    def empty(self) -> bool:
        return len(self) == 0