# Python Tidbits: Small Python tips, tricks, and packages you wish you knew about yesterday
> by Nick Hodgskin





## Native Python Tricks


### f-strings

In [None]:
# String concatenation
name = "John"
age = 25
print("Hello, " + name + "! You are " + str(age) + " years old.")


# Python 2: % syntax
name = "Alice"
age = 30
greeting = "Hello, %s! You are %d years old." % (name, age)
print(greeting)

# Python 3: .format() syntax
name = "Bob"
age = 25
greeting = "Hello, {}! You are {} years old.".format(name, age)
print(greeting)

# Python 3.6+: f-strings (the best!)
name = "Charlie"
age = 28
greeting = f"Hello, {name}! You are {age} years old."
print(greeting)

In [None]:
# Bonus: f-strings can evaluate expressions inline
a = 5
b = 10
result = f"The sum of {a} and {b} is {a + b}."
print(result)


def multiply(x, y):
    return x * y

a = 5
b = 10
result = f"The product of {a} and {b} is {multiply(a, b)}."
print(result)

In [None]:
# Bonus: f-strings support formatting options
pi = 3.14159265
formatted_pi = f"Pi rounded to 2 decimal places: {pi:.2f}"
print(formatted_pi)

radius = 6_371_000  # 6,371 km in meters
circumference = 2 * pi * radius
print(f"Earth's circumference (4 decimal places): {circumference:.4e} meters")
print(f"Earth's circumference (4 significant digits): {circumference:.4g} meters")

You can find out more about formatting options at [W3Schools: Python String Formatting](https://www.w3schools.com/python/python_string_formatting.asp).

Quick reference of format specified (mentioned in the article):
```
:<		Left aligns the result (within the available space)
:>		Right aligns the result (within the available space)
:^		Center aligns the result (within the available space)
:=		Places the sign to the left most position
:+		Use a plus sign to indicate if the result is positive or negative
:-		Use a minus sign for negative values only
: 		Use a space to insert an extra space before positive numbers (and a minus sign before negative numbers)
:,		Use a comma as a thousand separator
:_		Use a underscore as a thousand separator
:b		Binary format
:c		Converts the value into the corresponding Unicode character
:d		Decimal format
:e		Scientific format, with a lower case e
:E		Scientific format, with an upper case E
:f		Fix point number format
:F		Fix point number format, in uppercase format (show inf and nan as INF and NAN)
:g		General format
:G		General format (using a upper case E for scientific notations)
:o		Octal format
:x		Hex format, lower case
:X		Hex format, upper case
:n		Number format
:%		Percentage format
```




### enumerate and zip

In [None]:
# Use enumerate to loop over an iterable while keeping track of the index.

# Without enumerate
fruits = ['apple', 'banana', 'cherry']
for i in range(len(fruits)):
    print(i, fruits[i])

# With enumerate
for i, fruit in enumerate(fruits):
    print(i, fruit)

# Bonus: Start indexing at a custom number
for i, fruit in enumerate(fruits, start=1):
    print(i, fruit)

In [None]:
# under the hood
print(enumerate(fruits))
print(list(enumerate(fruits)))

In [None]:
# Use zip to loop over multiple iterables in parallel.

# Without zip
names = ['Alice', 'Bob', 'Charlie']
scores = [85, 90, 95]
for i in range(len(names)):
    print(names[i], scores[i])

# With zip
for name, score in zip(names, scores):
    print(name, score)

In [None]:
# Bonus: Unzipping
pairs = list(zip(names, scores))
print('pairs:', pairs)
names_unzipped, scores_unzipped = zip(*pairs)
print("names_unzipped:", names_unzipped)
print("scores_unzipped:", scores_unzipped)

### list comprehensions


In [None]:
numbers = [1, 2, 3, 4, 5]

# Example 1: Basic list comprehension
# Squaring numbers in a list

# using a for loop
squares = []
for x in numbers:
    squares.append(x**2)
print(squares)

# using a list comprehension
squares = [x**2 for x in numbers]
print(squares)


In [None]:
# Example 2: Using `if` to filter elements
# Keeping only even numbers

# Using a for loop
evens = []
for x in numbers:
    if x % 2 == 0:
        evens.append(x)
print(evens)

# list comprehension
evens = [x for x in numbers if x % 2 == 0]
print(evens)

In [None]:
# Example 3: Using `if` and `else` in a list comprehension
# Replacing odd numbers with -1

# Using a for loop
processed = []
for x in numbers:
    if x % 2 == 0:
        processed.append(x)
    else:
        processed.append(-1)
print(processed)

# list comprehension
processed = [x if x % 2 == 0 else -1 for x in numbers]
print(processed)

In [None]:
# Bonus: Filtering out negative values from data
data = [3.2, -1.5, 0.0, 4.7, -2.3, 5.6]
cleaned_data = [x for x in data if x >= 0]
print(cleaned_data)

### sets

In [None]:
# Creating a set
unique_numbers = {1, 2, 3, 4, 5}
print(unique_numbers)  # Output: {1, 2, 3, 4, 5}

# Adding elements to a set
unique_numbers.add(6)
print(unique_numbers)  # Output: {1, 2, 3, 4, 5, 6}

# Sets automatically handle duplicates
unique_numbers.add(3)
print(unique_numbers)  # Output: {1, 2, 3, 4, 5, 6} (no change)

# Using sets to remove duplicates from a list
data_with_duplicates = [5, 1, 2, 2, 3, 4, 4]
unique_data = list(set(data_with_duplicates))
print(unique_data)  # Output: [1, 2, 3, 4, 5] (note the order isn't preserved)

In [None]:
# Set operations

# Define two sets
set_a = {1, 2, 3, 4, 5}
set_b = {4, 5, 6, 7, 8}
print("A:", set_a)
print("B:", set_b)

# Union: Combine elements from both sets (no duplicates)
union_set = set_a | set_b  # or set_a.union(set_b)
print("Union (set_a | set_b):", union_set)  # Output: {1, 2, 3, 4, 5, 6, 7, 8}

# Difference: Elements in set_a but not in set_b
difference_set = set_a - set_b  # or set_a.difference(set_b)
print("Difference (set_a - set_b):", difference_set)  # Output: {1, 2, 3}

# Intersection: Elements common to both sets
intersection_set = set_a & set_b  # or set_a.intersection(set_b)
print("Intersection (set_a & set_b):", intersection_set)  # Output: {4, 5}

# Symmetric Difference: Elements in either set but not in both
symmetric_diff_set = set_a ^ set_b  # or set_a.symmetric_difference(set_b)
print("Symmetric Difference (set_a ^ set_b):", symmetric_diff_set)  # Output: {1, 2, 3, 6, 7, 8}

In [None]:
# Practical example: Finding unique elements in two datasets
data_1 = {10, 20, 30, 40, 50}
data_2 = {30, 40, 50, 60, 70}

# Unique elements in either dataset
unique_elements = data_1 ^ data_2
print("Unique elements in either dataset:", unique_elements)  # Output: {10, 20, 60, 70}

### getting help straight from Python (dir(), help(), locals())




In [None]:
# 1. Listing available methods and attributes with `dir()`
my_list = [1, 2, 3]
dir(my_list)  # Shows all methods and attributes of the list object

In [None]:
# 2. Getting detailed help with `help()`
help(my_list.pop)  # Displays documentation for the `append` method

In [None]:
print("List before:", my_list)
popped = my_list.pop(0)
print("List after:", my_list)
print("Popped item:", popped)

In [None]:

# 3. Inspecting local variables with `locals()`
def example_function():
    x = 10
    y = 20
    print(locals())  # Shows all local variables in the current scope
    # globals() would do the same but for global variables

example_function()

### advanced sorting using keys

In [None]:
# normal sorting
lst = [2, 1, 3, 6, 5, 4]
print("list (unsorted):", lst)
lst.sort()
print("list (sorted):", lst)

In [None]:
# Example: Sorting a list of tuples by the second element
def return_second_element(x):
    return x[1]
data = [(1, 20), (3, 15), (2, 25), (4, 10)]
print("data (unsorted):", data)
sorted_data = sorted(data, key=return_second_element)
print("data (sorted by the second element):", sorted_data)


# ...using an inline lambda function
data = [(1, 20), (3, 15), (2, 25), (4, 10)]
print("data (unsorted):", data)
sorted_data = sorted(data, key=lambda x: x[1])
print("data (sorted by the second element):", sorted_data)

In [None]:
# Example: Sorting a list of dictionaries by a specific key
data = [{'name': 'Alice', 'age': 25}, {'name': 'Bob', 'age': 20}, {'name': 'Charlie', 'age': 30}]
print("data (unsorted):", data)
sorted_data = sorted(data, key=lambda x: x['age'])
print("data (sorted by age):", data)

# Example: Sorting strings by their length
words = ['apple', 'banana', 'kiwi', 'cherry']
print("words (unsorted):", words)
sorted_words = sorted(words, key=len)
print("words (sorted by length):", sorted_words)
sorted_words = sorted(words, key=len, reverse=True)
print("words (sorted reverse by length):", sorted_words)

### filter and map
*Filter and map aren't necessary to know - you can get away with for loops - but it's an alternative way of doing things that may be more readable/faster for your use case.*


In [None]:
# Example: Filter even numbers from a list
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Using filter with a lambda function
evens = filter(lambda x: x % 2 == 0, numbers)
print(list(evens))  # note its `filter(function, iterable)`, and note that `filter` returns an iterator (not a list - hence the `list()` call)

In [None]:
# Example: Square all numbers in a list
numbers = [1, 2, 3, 4, 5]

# Using map with a lambda function
squared = map(lambda x: x**2, numbers)
print(list(squared))

In [None]:
# Example: Square only even numbers
numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

# Filter even numbers, then square them
result = map(lambda x: x**2, filter(lambda x: x % 2 == 0, numbers))
print(list(result))

In [None]:
# A more complex example: Converting Celsius to Fahrenheit

# Raw data: Some values are invalid (None or outliers)
data = [22.5, None, 18.3, 1000, 25.0, None, 19.8, 30.2, -999]

# Step 1: Filter out invalid values (None and outliers)
valid_data = filter(lambda x: x is not None and -50 <= x <= 50, data)

# Step 2: Convert Celsius to Fahrenheit
def c_to_f(celsius):
    return celsius * 9/5 + 32

fahrenheit_data = map(c_to_f, valid_data)

# Step 3: Round to 2 decimal places
rounded_data = map(lambda x: round(x, 2), fahrenheit_data)

# Final result
print(list(rounded_data))

Why is this powerful?:

- Readability: Each step is clearly separated and easy to understand.
- Lazy Evaluation: filter and map process data on-demand, which is memory-efficient for large datasets.
- Functional Style: Avoids mutable state and side effects, making the code more predictable.

## Python packages: Standard Library

### pprint


In [None]:
# Example: A messy nested data structure
data = [[{
    "experiment": {
        "name": "North Atlantic",
        "samples": [
            {"id": 1, "temperature": 298.15, "results": [0.1, 0.2, 0.3]},
            {"id": 2, "temperature": 310.15, "results": [0.15, 0.25, 0.35]},
        ],
        "metadata": {
            "author": "Dr. Smith",
            "date": "2023-10-01",
            "tags": ["biophysics", "simulation"],
        },
    }
}]]

# Standard print output (hard to read)
print(data)


In [None]:
from pprint import pprint

# Pretty-printed output (clean and readable)
pprint(data)

### pathlib
See [pathlib docs](https://docs.python.org/3/library/pathlib.html) for more info.

In [None]:
from pathlib import Path

# Create a Path object
data_dir = Path("data")  # Represents a directory named "data"

# Check if the directory exists
if not data_dir.exists():
    data_dir.mkdir()  # Create the directory if it doesn't exist

# Create a file path
data_file = data_dir / "experiment_results.csv"  # Use / to join paths

# Write to the file
data_file.write_text("Sample data\n")  # Write text to the file

# Read from the file
print(data_file.read_text())  # Read text from the file

# Iterate over files in a directory
for file in data_dir.glob("*.csv"):  # Find all CSV files
    print(f"Found file: {file.name}")

print("if you want the full path:", data_file.resolve())
print("if you want the stem:", data_file.stem)
print("if you want the extension:", data_file.suffix)


In [None]:
# Path objects can be passed to many functions from external libraries.
# If they *need* a string, you can do
print(str(data_file))

In [None]:
# let's look at what methods are available
print(dir(Path)) # hmm, a bit difficult to read...

In [None]:
def is_public(name):
    is_private = name.startswith("_")
    return not is_private

list(filter(is_public, dir(Path)))

# or
[name for name in dir(Path) if is_public(name)]

### datetime

In [None]:
from datetime import datetime, timedelta

# 1. Parsing a string into a datetime object
date_str = "2023-10-15 14:30:00"
parsed_date = datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
print(f"Parsed Date: {parsed_date} (object of type {type(parsed_date)})")

# 2. Formatting a datetime object into a string
formatted_date = parsed_date.strftime("%A, %B %d, %Y at %I:%M %p")
print(f"Formatted Date: {formatted_date} (object of type {type(formatted_date)})")

# 3. Calculating time differences
future_date = parsed_date + timedelta(days=7, hours=3)
time_diff = future_date - parsed_date
print(f"Time Difference: {time_diff} (object of type {type(time_diff)})")

# 4. Getting the current time
now = datetime.now() # time in UTC
print(f"Current Time: {now}")

In [None]:
# Bonus: Working with timezones (requires `pytz` or `zoneinfo` in Python 3.9+)
from zoneinfo import ZoneInfo  # Python 3.9+
ny_time = now.astimezone(ZoneInfo("America/New_York"))
print(f"New York Time: {ny_time}")

### functools - tools to work with functions
*Here we just cover `partial` and `cache`. See [docs](https://docs.python.org/3/library/functools.html) for more.*


In [None]:
import functools

#### functools.partial
- Simplifies repetitive function calls with fixed parameters (e.g., fitting curves, transformations).
- Makes code cleaner and more reusable.

In [None]:
help(functools.partial)

In [None]:
# Original function
def power(base, exponent):
    return base ** exponent

# Create a new function with `base` fixed to 2
square = functools.partial(power, exponent=2)
cube = functools.partial(power, exponent=3)

print(square(5))  # 25
print(cube(3))    # 27

#### functools.lru_cache
- Speeds up recursive or repetitive computations (e.g., dynamic programming, simulations)
- Reduces redundant calculations in expensive functions
- Should only be used on functions that are deterministic and idempotent (i.e., no side effects)

In [None]:
help(functools.lru_cache)

In [None]:
from time import time, sleep

@functools.lru_cache(maxsize=None)
def some_long_running_function(a, b):
    sleep(2)  # Simulate a long computation
    return a + b

print("first call with 1, 2:", some_long_running_function(1, 2))  # Takes 2 seconds

In [None]:
print("second call with 1, 2:", some_long_running_function(1, 2))  # Returns immediately

In [None]:
print("second call with 2, 4:", some_long_running_function(2, 4))  # takes 2 seconds


In [None]:
# A more real-world example
def fibonacci(n):
    """Inefficient recursive function to compute Fibonacci number.
    
    fibonacci(5) calls fibonacci(4) and fibonacci(3), but fibonacci(4) also calls fibonacci(3).
    This leads to an exponential number of function calls (2^(n-1) calls to be precise).
    """
    if n < 2:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)

n = 40
t = time()
fib = fibonacci(n)
print(f"Time taken: {time() - t:.2f} seconds")
print(f"Fibonacci({n}): {fib}")
print(f"Number of function calls: {2**(n-1)}")

In [None]:

@functools.lru_cache(maxsize=None)  # Cache all results (maxsize default is 128)
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)

n = 40
t = time()
fib = fibonacci(n)
print(f"Time taken: {time() - t:.2f} seconds")
print(f"Fibonacci({n}): {fib}")


#### functools.reduce

In [None]:
help(functools.reduce)

In [None]:
# Multiply all numbers in a list
numbers = [1, 2, 3, 4, 5]
product = functools.reduce(lambda x, y: x * y, numbers)

print(product)

In [None]:
# interested in other functools stuff? You can Google the public API for usecases...
[name for name in dir(functools) if is_public(name)]

### itertools - tools to work with iterators
*See [docs](https://docs.python.org/3/library/itertools.html) for more.*

What is an iterator?:
> An iterator is an object that contains a countable number of values.

In Python, an iterator is an object which implements the iterator protocol (i.e., it tells Python how to get from the current value to the next value). Iterators allow for efficient looping and processing of large datasets. 

In [None]:
import itertools

#### itertools.chain
Use chain to seamlessly combine multiple iterables into a single iterator.

In [None]:
list1 = [1, 2, 3]
list2 = ['a', 'b', 'c']
combined = itertools.chain(list1, list2)

print(list(combined))  # Output: [1, 2, 3, 'a', 'b', 'c']

#### itertools.product – Cartesian Product
Generate all possible combinations (Cartesian product) of input iterables.

In [None]:
colors = ['red', 'green']
sizes = ['S', 'M', 'L']

combinations = itertools.product(colors, sizes)
print(list(combinations))

#### itertools.combinations – Generate Combinations
Generate all possible combinations of a specific length from an iterable.

In [None]:
data = ['a', 'b', 'c']
combinations = itertools.combinations(data, 2)

print(list(combinations))

#### itertools.permutations - Generate Permutations
Generate all possible permutations of an iterable.

In [None]:
data = ['a', 'b', 'c']
perms = itertools.permutations(data)

print(list(perms))

#### itertools.islice – Slice Iterators
Slice an iterator without converting it to a list first.

In [None]:
data = range(10)
sliced = itertools.islice(data, 2, 6)  # Start at index 2, end at index 6

print(list(sliced))

#### itertools.groupby – Group Data

In [None]:
data = [('a', 1), ('a', 2), ('b', 3), ('b', 4), ('c', 5)]
grouped = itertools.groupby(data, key=lambda x: x[0])

for key, group in grouped:
    print(key, list(group))

#### itertools.cycle – Infinite Cycling
Cycle through an iterable indefinitely.

In [None]:
import itertools

colors = ['red', 'green', 'blue']
cycled = itertools.cycle(colors)

for _ in range(5):
    print(next(cycled))

#### itertools.tee – Duplicate an Iterator
Split an iterator into multiple independent iterators.

In [None]:
import itertools

data = iter(range(5))
iter1, iter2 = itertools.tee(data, 2)

print(list(iter1))
print(list(iter2))

#### more itertools

In [None]:
[name for name in dir(itertools) if is_public(name)]

## Python packages: 3rd Party


### tqdm

After installing it using `conda install tqdm` or `pip install tqdm`...

In [None]:
from tqdm import tqdm

def run_calculations():
    sleep(0.1)  # Simulate a long computation

for _ in tqdm(range(100)):
    run_calculations()

In [None]:
# Bonus tip!: Use `_` when assigning variables you don't care about. Good for for loops and unpacking.
# Example 1: Unpacking values
data = (1, 2, 3)
_, y, _ = data
print(y)

## Topics not discussed, and further reading

Things not mentioned in this talk:
- Testing (using Pytest)
  - this is quite a large topic and could be a talk in itself
- Jupyter Notebook tips and tricks (+using markdown)
  - this is quite a large topic and could be a talk in itself
- logging
  - this is a topic that could form part of a talk in itself


Check out the rest of the Python standard library for more interesting packages!
- [Python | The Python Standard Library](https://docs.python.org/3/library/index.html)
- [Python | Brief tour of the standard library](https://docs.python.org/3/tutorial/stdlib.html)
