In [1]:
# This notebook contains the details of NamedTuple RP tutorial https://realpython.com/python-namedtuple/#using-namedtuple-to-write-pythonic-code
# Python’s namedtuple() is a factory function available in collections. 
# You can access the values in a given named tuple using the dot notation and the field names, like in obj.attr. The reason is accessing through indices can be confusing

# Features of NamedTuple

- Are immutable data structures

- Have a consistent hash value

- Can work as dictionary keys

- Can be stored in sets

- Have a helpful docstring based on the type and field names

- Provide a helpful string representation that prints the tuple content in a name=value format

- Support indexing

- Provide additional methods and attributes, such as ._make(), _asdict(), ._fields, and so on

- Are backward compatible with regular tuples

- Have similar memory consumption to regular tuples


In [2]:
# regular tuple
point = (5, 8)
point[1] = 8

TypeError: 'tuple' object does not support item assignment

In [3]:
from collections import namedtuple

Point = namedtuple("Point", "x y") 
issubclass(Point, tuple)

True

In [4]:
newpt = Point(2, 3)
# dot notation access
newpt.x

2

In [6]:
# access through indices
newpt[1]

newpt.y = 57  # its still immutable

AttributeError: can't set attribute

In [8]:
# the values held inside the tuple can be mutable. Note that tuples or named tuples with mutable values aren’t hashable, as you saw in the above example.

from collections import namedtuple

Person = namedtuple("Person", "name children")  # the instantiation doesn't say anything about the attr types

john = Person("John Doe", ["Timmy", "Jimmy"])

print(john)

id(john.children)

john.children.append("Tina")

print(id(john.children))

hash(john)

Person(name='John Doe', children=['Timmy', 'Jimmy'])
1916921884160


TypeError: unhashable type: 'list'

To create a new namedtuple, you need to provide two positional arguments to the function: (namedTuple is a factory_function)

typename provides the class name for the namedtuple returned by namedtuple(). You need to pass a string with a valid Python identifier to this argument.
field_names provides the field names that you’ll use to access the values in the tuple. You can provide the field names using:

An iterable of strings, such as ["field1", "field2", ..., "fieldN"]

A string with each field name separated by whitespace, such as "field1 field2 ... fieldN"

A string with each field name separated by commas, such as "field1, field2, ..., fieldN"

In [9]:
# A list of strings for the field names
Point = namedtuple("Point", ["x", "y"])
Point(2, 4)
# same as Point(x=2, y=4)

# A string with comma-separated field names
Point = namedtuple("Point", "x, y")
Point

Point(4, 8)

# A generator expression for the field names
Point = namedtuple("Point", (field for field in "xy"))
Point
Point(8, 16)

Point(x=8, y=16)

When you use an unordered iterable to provide the fields to a namedtuple, you can get unexpected results. In the above example, the coordinate names are swapped, which might not be right for your use case.

```
>>> from collections import namedtuple

>>> Point = namedtuple("Point", {"x", "y"})
>>> Point(2, 4)
Point(y=2, x=4)
```

In [10]:
# Not having any defaults in namedTuple

Dev = namedtuple("Developer", "name level lang")
john = Dev('john')
john

TypeError: Developer.__new__() missing 2 required positional arguments: 'level' and 'lang'

In [11]:
# using defaults in namedTuple

Dev = namedtuple("Developer", "name level lang", defaults=["junior", "JavaScript"])
john = Dev('john')
john

Developer(name='john', level='junior', lang='JavaScript')

In [12]:
# .__module__ attribute of the resulting namedtuple is set to that value. This attribute holds the name of the module in which a given function or callable is defined:
modPoint = namedtuple("Point", "x y", module='custom')
modPoint

custom.Point

Besides the methods inherited from tuple, such as .count() and .index(), namedtuple classes also provide three additional methods and two attributes. To prevent name conflicts with custom fields, the names of these attributes and methods start with an underscore. 

In [14]:
person = namedtuple('newPerson', "name age height weight")

person._make(['doe',25, 87, 86])

newPerson(name='doe', age=25, height=87, weight=86)

In [17]:
# Regular tuple creation is as below
aneu = person('aneu', 65, 86, 75)

In [18]:
# convert to dictionaries
aneu._asdict()

{'name': 'aneu', 'age': 65, 'height': 86, 'weight': 75}

In [19]:
# this tuple is modifiable... ;)
aneu._replace(weight=26)

newPerson(name='aneu', age=65, height=86, weight=26)

In [20]:
aneu._field_defaults

{}

In [21]:
aneu._fields

('name', 'age', 'height', 'weight')

Opportunities to create Pythonic Namedtuple code

In [22]:
pen = (5, 'liquid', False)
# line weight, style and beveled edges

In [23]:
Pen = namedtuple('Pen', 'weight style edges')
newPen = Pen(weight=5, style='liquid', edges=False)
newPen

Pen(weight=5, style='liquid', edges=False)

In [24]:
# when you need to return multiple values from a given function

def custom_divmod(a, b):
    DivMod = namedtuple('DivMod', 'quot remd')
    return DivMod(*divmod(a, b))

In [25]:
custom_divmod(8, 6)

DivMod(quot=1, remd=2)

Reducing the number of arguments a function can take is considered a best programming practice. This makes your function’s signature more concise and optimizes your testing process because of the reduced number of arguments and possible combinations between them.

In [26]:
User = namedtuple("User", "username client_name plan")
user = User("john", "John Doe", "Premium")

def create_user(db, user):
    db.add_user(user.username)
    db.complete_user_profile(
        user.username,
        user.client_name,
        user.plan
    )

In [None]:
import csv

from collections import namedtuple

with open("employees.csv", "r") as csv_file:

    reader = csv.reader(csv_file)

    Employee = namedtuple("Employee", next(reader), rename=True)

    for row in reader:

        employee = Employee(*row)

        print(employee.name, employee.job, employee.email)

In this section, you’ll take a general look at the similarities and differences between namedtuple classes and other Python data structures, such as dictionaries, data classes, and typed named tuples. You’ll compare named tuples with other data structures regarding the following characteristics:

Readability

Mutability

Memory usage

Performance

### shortcomings of namedTuples compared to Dictionary 

You can update the value of an existing key in a dictionary, but you can’t do something similar in a named tuple. You can add new key-value pairs to existing dictionaries, but you can’t add field-value pairs to existing named tuples.


### shortcoming of namedTuples compared to DataClasses

Data Classes can be thought of as “mutable namedtuples with defaults.” 
Mutability-wise, data classes are mutable by definition, so you can change the value of their attributes when needed. However, they have an ace up their sleeve. You can set the dataclass() decorator’s frozen argument to True and make them immutabl

### namedTuples Vs typing.NamedTuples
Python 3.5 introduced a provisional module called typing to support function type annotations or type hints. This module provides NamedTuple, which is a typed version of namedtuple. With NamedTuple, you can create namedtuple classes with type hints

With NamedTuple, you can create tuple subclasses that support type hints and attribute access through the dot notation. Since the resulting class is a tuple subclass, it’s immutable as well.

A subtle detail to notice in the above example is that NamedTuple subclasses look even more similar to data classes than named tuples.

When it comes to memory consumption, both namedtuple and NamedTuple instances use the same amount of memory.

In [28]:
# namedtuple_dict_time.py

from collections import namedtuple
from time import perf_counter

def average_time(structure, test_func):
    time_measurements = []
    for _ in range(1_000_000):
        start = perf_counter()
        test_func(structure)
        end = perf_counter()
        time_measurements.append(end - start)
    return sum(time_measurements) / len(time_measurements) * int(1e9)

def time_dict(dictionary):
    "x" in dictionary
    "missing_key" in dictionary
    2 in dictionary.values()
    "missing_value" in dictionary.values()
    dictionary["y"]

def time_namedtuple(named_tuple):
    "x" in named_tuple._fields
    "missing_field" in named_tuple._fields
    2 in named_tuple
    "missing_value" in named_tuple
    named_tuple.y

Point = namedtuple("Point", "x y z")
point = Point(x=1, y=2, z=3)

namedtuple_time = average_time(point, time_namedtuple)
dict_time = average_time(point._asdict(), time_dict)
gain = dict_time / namedtuple_time

print(f"namedtuple: {namedtuple_time:.2f} ns ({gain:.2f}x faster)")
print(f"dict:       {dict_time:.2f} ns")

namedtuple: 921.25 ns (1.18x faster)
dict:       1088.13 ns


In [29]:
from datetime import date

BasePerson = namedtuple(
    "BasePerson",
    "name birthdate country",
    defaults=["Canada"]
)

class Person(BasePerson):
    """A namedtuple subclass to hold a person's data."""
    __slots__ = ()
    def __repr__(self):
        return f"Name: {self.name}, age: {self.age} years old."
    @property
    def age(self):
        return (date.today() - self.birthdate).days // 365

jane = Person("Jane", date(1996, 3, 5))

In [30]:
jane

Name: Jane, age: 27 years old.

In [31]:
# tuple_namedtuple_time.py

from collections import namedtuple
from time import perf_counter

def average_time(test_func):
    time_measurements = []
    for _ in range(1_000):
        start = perf_counter()
        test_func()
        end = perf_counter()
        time_measurements.append(end - start)
    return sum(time_measurements) / len(time_measurements) * int(1e9)

def time_tuple():
    tuple([1] * 1000)

fields = [f"a{n}" for n in range(1000)]
TestNamedTuple = namedtuple("TestNamedTuple", fields)

def time_namedtuple():
    TestNamedTuple(*([1] * 1000))

namedtuple_time = average_time(time_namedtuple)
tuple_time = average_time(time_tuple)
gain = namedtuple_time / tuple_time

print(f"tuple:      {tuple_time:.2f} ns ({gain:.2f}x faster)")
print(f"namedtuple: {namedtuple_time:.2f} ns")

tuple:      11784.00 ns (7.01x faster)
namedtuple: 82586.20 ns
