# <font color=#FF1F58> Python Basics (Part IV): functions </font>


In [None]:
## This code imports folders and files so that they are available for us to work with
!git clone https://github.com/aralittle/intro_to_python.git

In [None]:
## do not run this, just for cleanup
!rm -rf intro_to_python/

###Python functions

A function is a block of code that encapsulates a specific task.

In [None]:
# built-in functions
print('hello world')
type(3)

User defined functions: If you want to create your own function you need to define:
- which arguments a python function takes (if any)
- which values a python function returns (if any)



In [None]:
def name_of_the_function(argument1, argument2, ...):
  # perform the task
  return something, something_else

In [None]:
# this function does not return anything
def calculate_price(product_name, quantity, price_per_kilo):
  final_price = quantity * price_per_kilo
  print(f"The cost of {product_name} is {final_price}")

In [None]:
calculate_price('bananas', 3, 1.50)

In [None]:
# this function returns a number
def sum_numbers(first_number, second_number):
  sum = first_number + second_number
  return sum

In [None]:
result = sum_numbers(2,5)
print(result)

In general the position of the arguments should be the same in the function definition and in the function call, unless you use keyword arguments

In [None]:
# you can use keywords to call a function and you can disregard order
calculate_price(product_name='apples', quantity=2, price_per_kilo=2.5)
calculate_price(quantity=2, product_name='grapes', price_per_kilo=4.5)

In [None]:
# you can define default values in your function
def calculate_price(product_name='pears', quantity=1, price_per_kilo=1.90):
  final_price = quantity * price_per_kilo
  print(f"The cost of {product_name} is {final_price}")
calculate_price()
calculate_price(product_name='red pears', price_per_kilo=2000)

Why should you write functions?
- Divide the job into manageable steps: break long scripts that perform complex processes into units that are easier to define, understand and debug. Ideally, each funcion should only **perform one task**
- Reuse code: you can write functions in such a way that you can reuse them if needed



In [None]:
#open an original Word document, iterates through the paragraphs and tables,
#replaces any occurrences of 'placeholder' text with 'new text',
#replaces occurrences of 'date' text with the current date
#and then saves the modified document to a new file
import docx
import datetime

original_doc = docx.Document('original.docx')
today = datetime.date.today()

for para in original_doc.paragraphs:
    if 'placeholder' in para.text:
        para.text = para.text.replace('placeholder', 'new text')

for table in original_doc.tables:
    for row in table.rows:
        for cell in row.cells:
            if 'placeholder' in cell.text:
                cell.text = cell.text.replace('placeholder', 'new text')

for para in original_doc.paragraphs:
    if 'date' in para.text:
        para.text = para.text.replace('date', today.strftime("%B %d, %Y"))

for table in original_doc.tables:
    for row in table.rows:
        for cell in row.cells:
            if 'date' in cell.text:
                cell.text = cell.text.replace('date', today.strftime("%B %d, %Y"))

original_doc.save('modified.docx')

In [None]:
#open an original Word document, iterates through the paragraphs and tables,
#replaces any occurrences of 'placeholder' text with 'new text',
#replaces occurrences of 'date' text with the current date
#and then saves the modified document to a new file

import datetime
import docx

# Function to open Word document
def open_doc(file):
    doc = docx.Document(file)
    return doc

# Function to replace text
def replace_text(doc, old_text, new_text):
    for para in doc.paragraphs:
        if old_text in para.text:
            para.text = para.text.replace(old_text, new_text)

    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                if old_text in cell.text:
                    cell.text = cell.text.replace(old_text, new_text)
    return doc

# Function to save Word document
def save_doc(doc, file):
    doc.save(file)

# Main function
def main():
    today = datetime.date.today()
    original_doc = open_doc('original.docx')
    modified_doc = replace_text(original_doc, 'placeholder', 'new text')
    modified_doc_with_date = replace_text(modified_doc, 'date', today)
    save_doc(modified_doc_with_date, 'modified.docx')


main()

### Activity to practice with functions and opening files, working with the contents and saving those contents.

Remember:
- How to work with files: https://colab.research.google.com/drive/1CZCirGQevXg71lG_fTA5nxVydEdcYIXH?usp=sharing
- How to work with strings: https://colab.research.google.com/drive/1rLFDBzZtFDtAdVX4W0SIxbWHwCUFA16-?usp=sharing

In [None]:
# 1. open activities_python_4.txt using "latin-1" or "iso-8859" encodings (they're the same) so that the contents are loaded as a list
# 2. Complete the text cleaning function
# 2.1 remove the initial numeration --> this will be easier once we learn how to do it with regex
# 2.2 remove punctuation and symbols such as "*","$>".... --> this will be easier once we learn how to do it with regex
# 2.3 make sure there are no spaces at the beginning or end of the lines
# 2.4 make sure there are no double spaces between the words
# 2.5. pass the text to lowercase
# 3. save the cleaned text contents to a new document with an encoding of your choosing

In [None]:
def clean_lines(list_of_corpus_lines):
  print("I'm being called!")
  cleaned_lines = []

  for line in list_of_corpus_lines:
    # --> Your code for corpus cleaning goes here
    line = line[7:]
    line = line.replace('.', '').replace('?','').replace('<$','').replace('*','').replace('$>','')
    line = line.strip()
    line = line.replace('  ', ' ')
    line = line.lower()
    print(line)

    # --> Your code for corpus cleaning ends here
    if line:
      cleaned_lines.append(line)
  # --> Complete the following line to **convert the list to a string**:
  cleaned_lines_string = 'delete this and add code here'

  return cleaned_lines_string

# Your code for corpus opening goes here
list_of_corpus_lines = open('/content/intro_to_python/data/activities_python_4.txt', encoding='latin-1').readlines()
cleaned_lines = clean_lines(list_of_corpus_lines)
# Your code for saving the corpus to a file goes here

