# Introduction
It's time to put your new skills to the test! In this lab, you'll have to find the users using an old email domain in a big list using regular expressions. To do so, you'll need to write a script that includes:

- Replacing the old domain name (abc.edu) with a new domain name (xyz.edu).
- Storing all domain names, including the updated ones, in a new file.

In [None]:
#!/usr/bin/env python3

# import csv and regex module
import re
import csv

# This function uses regex to identify the domain of the user email addresses in the user_emails.csv file.
def contains_domain(address, domain):
  """Returns True if the email address contains the given,domain,in the domain position, false if not."""
  domain = r'[\w\.-]+@'+domain+'$'
  if re.match(domain,address):
    return True
  return False

"""The replace_domain function takes in one email address at a time, 
as well as the email's old domain name and its new domain name."""
def replace_domain(address, old_domain, new_domain):
  """Replaces the old domain with the new domain in the received address."""
  old_domain_pattern = r'' + old_domain + '$'
  address = re.sub(old_domain_pattern, new_domain, address)
  return address

# call the above defined functions: contains_domain() and replace_domain from the main(). 
# This will allow us to find the old domain email address, replace it with the newer one, 
# and write the updated list to a CSV file in the data directory.

def main():
  """Processes the list of emails, replacing any instances of the old domain with the new domain."""
  old_domain, new_domain = 'abc.edu', 'xyz.edu'
  csv_file_location = '<csv_file_location>'
  report_file = '<path_to_home_directory>' + '/updated_user_emails.csv'

#   Initialize an empty list where you will store the user email addresses. 
#   This is then passed to the function contains_domain, where a regular expression is used to match them 
#   and finally replace the domains using the replace_domain function.
  user_email_list = []

  # Next, initialize the two different lists, old_domain_email_list and new_domain_email_list.
  # old_domain_email_list will contain all the email addresses with the old domain 
  # that the regex would match within the function contains_domain.
  # Since the function contains_domain takes in email address passed as parameter, 
  # we will iterate over the user_email_list to pass email addresses one by one.
  # For every matched email address, we will append it to the list old_domain_email_list.
  old_domain_email_list = []
  new_domain_email_list = []

  # The data is read from the user_emails.csv file and passed to the user_data_list. 
  # So the user_data_list now contains the same information as that present in user_emails.csv file.
  # While we do this, we will also add all the email addresses into the user_email_list that we initialized in the previous step.
  with open(csv_file_location, 'r') as f:
    user_data_list = list(csv.reader(f))
    user_email_list = [data[1].strip() for data in user_data_list[1:]]

    # The list old_domain_email_list should contain all the email addresses with the old domain.
    # This will be checked by the function contains_domain. 
    # The function replace_domain will then take in the email addresses (with old domain) and replace them with the new domains.
    for email_address in user_email_list:
      if contains_domain(email_address, old_domain):
        old_domain_email_list.append(email_address)
        replaced_email = replace_domain(email_address,old_domain,new_domain)
        new_domain_email_list.append(replaced_email)

    # define the headers for our output file through the user_data_list, which contains all the data read from user_emails.csv file.
    email_key = ' ' + 'Email Address'
    email_index = user_data_list[0].index(email_key)

    # replace the email addresses within the user_data_list 
    # (which initially had all the user names and respective email addresses read from the user_emails.csv file)
    # by iterating over the new_domain_email_list, and replacing the corresponding values in user_data_list.
    
    for user in user_data_list[1:]:
      for old_domain, new_domain in zip(old_domain_email_list, new_domain_email_list):
        if user[email_index] == ' ' + old_domain:
          user[email_index] = ' ' + new_domain
  # close the file using the close() method. A closed file no longer be read or written.
  f.close()
  # Now write the list to an output file, which we declared at the beginning of the script within the variable report_file.
  with open(report_file, 'w+') as output_file:
    writer = csv.writer(output_file)
    writer.writerows(user_data_list)
    output_file.close()

main() # call the main() method.