## Documentation

### Documentation in Python
- Comments

In [None]:
# Square the number x

- Docstrings

In [None]:
"""Square the number x  

    :param x: number to square   
    :return: x squared    
    
    >>> square(2)  
    4   
    """

In [None]:
# This is a valid comment
x = 2

In [None]:
y = 3 # This is also a valid comment

In [None]:
# You can't see me unless you look at the source code
# Hi future collaborators!!

### Effective comments
Commenting `'what'`

In [None]:
# Define people as 5
people = 5

# Multiply people by 3
people * 3

Commenting `'why'`

In [None]:
# There will be 5 people attending the party
people = 5

# We need 3 pieces of pizza per person
people * 3

### Docstrings

In [None]:
def function(x):
    """High level description of function  
    
    :param x: description of parameter x   
    :return: description of return value

    >>> # Example function usage  
    Expected output of example function usage 
    """
    
    # function code

### Example docstring

In [None]:
def square(x):
    """Square the number x  
    
    :param x: number to square    
    :return: x squared   
    
    >>> square(2)   
    4  
    """
    # `x * x` is faster than `x ** 2`
    # reference: https://stackoverflow.com/a/29055266/5731525
    return x * x

In [None]:
# Example docstring output
help(square)

In [None]:
# Practice 1

import re

def extract_0(text):
    # match and extract dollar amounts from the text
    return re.findall(r'\$\d+\.\d\d', text)

def extract_1(text):
    # return all matches to regex pattern
    return re.findall(r'\$\d+\.\d\d', text)

# Print the text
print(text)

# Print the results of the function with better commenting
print(extract_1(text))

In [None]:
# Practice 2

# Complete the function's docstring
def tokenize(text, regex=r'[a-zA-z]+'):
  """Split text into tokens using a regular expression

  :param text: text to be tokenized
  :param regex: regular expression used to match tokens using re.findall 
  :return: a list of resulting tokens

  >>> tokenize('the rain in spain')
  ['the', 'rain', 'in', 'spain']
  """
  return re.findall(regex, text, flags=re.IGNORECASE)

# Print the docstring
help(tokenize)

### The Zen of Python

In [None]:
import this

### Descriptive naming

In [None]:
# Poor naming
def check(x, y=100):
    return x >= y

In [None]:
# Descriptive naming
def is_boiling(temp, boiling_point=100):
    return temp >= boiling_point

In [None]:
# Going overboard
def check_if_temperature_is_above_boiling_point(temperature_to_check, celsius_water_boiling_point=100):
    return temperature_to_check >= celsius_water_boiling_point

In [None]:
# Making a pizza - complex

def make_pizza(ingredients):
    # Make dough   
    dough = mix(ingredients['yeast'],      
                ingredients['flour'],   
                ingredients['water'],     
                ingredients['salt'],      
                ingredients['shortening'])   
    
    kneaded_dough = knead(dough)  
    risen_dough = prove(kneaded_dough)
    
    # Make sauce   
    sauce_base = sautee(ingredients['onion'],     
                        ingredients['garlic'],         
                        ingredients['olive oil']) 
    
    sauce_mixture = combine(sauce_base,       
                            ingredients['tomato_paste'], 
                            ingredients['water'],                 
                            ingredients['spices']) 
    
    sauce = simmer(sauce_mixture)    
    ...

In [None]:
# Making a pizza - simple

def make_pizza(ingredients): 
    dough = make_dough(ingredients)   
    sauce = make_sauce(ingredients)  
    assembled_pizza = assemble_pizza(dough, sauce, ingredients)
    
    return bake(assembled_pizza)

In [None]:
# Practice 3

def hypotenuse_length(leg_a, leg_b):
    """Find the length of a right triangle's hypotenuse

    :param leg_a: length of one leg of triangle
    :param leg_b: length of other leg of triangle
    :return: length of hypotenuse
    
    >>> hypotenuse_length(3, 4)
    5
    """
    return math.sqrt(leg_a**2 + leg_b**2)


# Print the length of the hypotenuse with legs 6 & 8
print(hypotenuse_length(6, 8))

In [None]:
# Practice 4

from statistics import mean

# Sample measurements of pupil diameter in mm
pupil_diameter = [3.3, 6.8, 7.0, 5.4, 2.7]

# Average pupil diameter from sample
mean_diameter = mean(pupil_diameter)

print(mean_diameter)

In [None]:
# Practice 5

def polygon_perimeter(n_sides, side_len):
    return n_sides * side_len

def polygon_apothem(n_sides, side_len):
    denominator =  2 * math.tan(math.pi / n_sides)
    return side_len / denominator

def polygon_area(n_sides, side_len):
    perimeter = polygon_perimeter(n_sides, side_len)
    apothem = polygon_apothem(n_sides, side_len)

    return perimeter * apothem / 2

# Print the area of a hexagon with legs of size 10
print(polygon_area(n_sides=6, side_len=10))

## Unit Testing

### Why testing?
- Confirm code is working as intended
- Ensure changes in one function don't break another
- Protect against changes in a dependency

### Testing in Python
- `doctest`
- `pytest`

### Using doctest

In [None]:
def square(x):
    """Square the number x    
    
    :param x: number to square  
    :return: x squared   
    
    >>> square(3)  
    9    
    """
    return x ** 3

import doctest
doctest.testmod()

### pytest Sturucture
![Screenshot 2025-08-23 at 11.35.24 AM.png](attachment:8fc4e2a4-82e0-4c84-a8d2-d712ea45538f.png)

![Screenshot 2025-08-23 at 11.35.42 AM.png](attachment:aed77945-a113-42cb-8149-9bdd121e6b59.png)

### Writing unit tests
working in `workdir/tests/test_document.py`

In [None]:
from text_analyzer import Document
# Test tokens attribute on Document object

def test_document_tokens():  
    doc = Document('a e i o u')
    
    assert doc.tokens == ['a', 'e', 'i', 'o', 'u']
    
    # Test edge case of blank document
    def test_document_empty():   
        doc = Document('')
        
        assert doc.tokens == []
        assert doc.word_counts == Counter()


In [None]:
# Create 2 identical Document objects
doc_a = Document('a e i o u')
doc_b = Document('a e i o u')

# Check if objects are ==
print(doc_a == doc_b)

# Check if attributes are ==
print(doc_a.tokens == doc_b.tokens)

print(doc_a.word_counts == doc_b.word_counts)

### Running pytest
working with `terminal`

In [None]:
~/work_dir $ pytest

In [None]:
~/work_dir $ pytest tests/test_document.py

In [None]:
# Practice 6

def sum_counters(counters):
    """Aggregate collections.Counter objects by summing counts

    :param counters: list/tuple of counters to sum
    :return: aggregated counters with counts summed

    >>> d1 = text_analyzer.Document('1 2 fizz 4 buzz fizz 7 8')
    >>> d2 = text_analyzer.Document('fizz buzz 11 fizz 13 14')
    >>> sum_counters([d1.word_counts, d2.word_counts])
    Counter({'fizz': 4, 'buzz': 2})
    """
    return sum(counters, Counter())

doctest.testmod()

In [None]:
# Practice 7

from collections import Counter
from text_analyzer import SocialMedia

# Create an instance of SocialMedia for testing
test_post = 'learning #python & #rstats is awesome! thanks @datacamp!'
sm_post = SocialMedia(test_post)

# Test hashtag counts are created properly
def test_social_media_hashtags():
    expected_hashtag_counts = Counter({'#python': 1, '#rstats': 1})
    assert sm_post.hashtag_counts == expected_hashtag_counts

### Documenting Classes

In [None]:
class Document:
    """Analyze text data  
    :param text: text to analyze   
    
    :ivar text: text originally passed to the instance on creation   
    :ivar tokens: Parsed list of words from text    
    :ivar word_counts: Counter containing counts of hashtags used in text   
    """
    
    def __init__(self, text):   
        ...

### Links and additional tools
- Sphinx - Generate beautiful documentation
- Travis CI - Continuously test your code
- GitHub & GitLab - Host your projects with git
- Codecov - Discover where to improve your projects tests
- Code Climate - Analyze your code for improvements in readability

In [None]:
# Practice 8

from text_analyzer import Document

class SocialMedia(Document):
    """Analyze text data from social media
    
    :param text: social media text to analyze

    :ivar hashtag_counts: Counter object containing counts of hashtags used in text
    :ivar mention_counts: Counter object containing counts of @mentions used in text
    """
    def __init__(self, text):
        Document.__init__(self, text)
        self.hashtag_counts = self._count_hashtags()
        self.mention_counts = self._count_mentions()