# Introduction to Python: Basics

## Print Statements

In [None]:
# Print a simple string
print("I'm excited to learn Python.")

In [None]:
# Add numbers
x = 4
y = 5

z = x + y
print("Four plus five equals {0}.".format(z))

In [None]:
# Combine lists
A = [1, 2, 3, 4]
B = ["first", "second", "third", "fourth"]

C = A + B
D = list(zip(A,B))
print("List A: {0}\nList B: {1}\nList C: {2}\nList D: {3}".format(A, B, C, D))

## Numbers

In [None]:
# Integers
x = 9
print("9^2 = {0}".format(x**2))

In [None]:
# Floating-point numbers
print("8.3/2.7 = {0:.3f}".format(8.3/2.7))

In [None]:
from math import exp, log, sqrt

In [None]:
# Some mathematical functions available in the math module
print("e^3 = {0:.4f}".format(exp(3)))
print("ln(4) = {0:.3f}".format(log(4)))
print("sqrt(81) = {0:.1f}".format(sqrt(81)))

## Strings

In [None]:
# A string inside single quotes, so include a backslash before the inner single quote
print('I\'m enjoying learning Python')

In [None]:
# Use triple single or double quotes if you want the string to span multiple lines
# and you don't want to use the "\"
print('''You can use triple single quotes
for multi-line comment strings''')

print("""You can also use triple double quotes
for multi-line comment strings""")

In [None]:
# Combine strings
string1 = "This is a "
string2 = "short string."

sentence = string1 + string2
print("{0:s}".format(sentence))

In [None]:
# Repeat a string
print("{0:s} {1:s}{2:s}".format("She is", "very "*4, "beautiful."))

In [None]:
# Determine the number of characters in a string, including spaces and punctuation
m = len(sentence)
print("The string labeled 'sentence' is {0:d} characters long.".format(m))

In [None]:
# split()
my_string = "My deliverable is due in May"

my_string_seq1 = my_string.split()
my_string_seq2 = my_string.split(" ", 2)

print("Split string on whitespace: {0}\nSplit only two times: {1}".format(my_string_seq1, my_string_seq2))

In [None]:
my_string2 = "Your,deliverable,is,due,in,June"
my_string2_seq = my_string2.split(',')
print("Split string on comma: {0}".format(my_string2_seq))

In [None]:
# join()
print("Join sequence elements into a string with comma separator: {0}".format(','.join(my_string_seq1)))

In [None]:
# strip()
string_to_strip = "   Remove unwanted characters from this string\t\t    \n"
print("string_to_strip: {0:s}".format(string_to_strip))

string_lstrip = string_to_strip.lstrip()
print("lstrip: {0:s}".format(string_lstrip))

string_rstrip = string_to_strip.rstrip()
print("rstrip: {0:s}".format(string_rstrip))

string_strip = string_to_strip.strip()
print("strip: {0:s}".format(string_strip))

In [None]:
# replace()
string_to_replace = "Let's replace the spaces in this sentence with other characters."

string_replace = string_to_replace.replace(" ", ",")
print("Replace spaces with commas: {0:s}".format(string_replace))

In [None]:
# lower(), upper(), capitalize()
string_to_lower = "Here's WHAT Happens WHEN You Use lower."
print("lowercase all characters: {0:s}".format(string_to_lower.lower()))

string_to_upper = "Here's what Happens when You Use UPPER."
print("uppercase all characters: {0:s}".format(string_to_upper.upper()))

## Regular Expressions / Pattern Matching

In [None]:
import re

In [None]:
# Print the pattern each time it is found in the string
string = "The quick brown fox jumps over the lazy dog."
string_seq = string.split()

pattern = re.compile(r"(?P<matches>The)", re.I)

for word in string_seq:
    if pattern.search(word):
        print("{:s}".format(pattern.search(word).group('matches')))

In [None]:
# Substitute the letter "a" for the word "the" in the string
string = "The quick brown fox jumps over the lazy dog."

string_to_find = r"The"
pattern = re.compile(string_to_find, re.I)

print("{:s}".format(pattern.sub("a", string)))

## Dates

In [None]:
from datetime import date, datetime

In [None]:
# Print today's date, as well as the year, month, and day elements
today = date.today()
print("today's date: {0!s}".format(today))
print("year: {0!s}".format(today.year))
print("month: {0!s}".format(today.month))
print("day: {0!s}".format(today.day))

current_datetime = datetime.today()
print("today's datetime: {0!s}".format(current_datetime))

In [None]:
# Create four strings representing the same date with different date formats
date1 = today.strftime('%m/%d/%Y')
date2 = today.strftime('%b %d, %Y')
date3 = today.strftime('%Y-%m-%d')
date4 = today.strftime('%B %d, %Y')
print(date1)
print(date2)
print(date3)
print(date4)

In [None]:
# Create DATETIME objects based on strings that have different date formats
print("{!s}".format(datetime.strptime(date1, '%m/%d/%Y')))
print("{!s}".format(datetime.strptime(date2, '%b %d, %Y')))

# Create DATE objects based on strings that have different date formats
print("{!s}".format(datetime.date(datetime.strptime(date3, '%Y-%m-%d'))))
print("{!s}".format(datetime.date(datetime.strptime(date4, '%B %d, %Y'))))

## Lists

In [None]:
# Use square brackets to create a list
# len() counts the number of elements in a list
# max() and min() find the maximum and minimum numbers in numeric lists
# count() counts the number of times a value appears in a list
a_list = [1, 2, 3]

print("{}".format(a_list))
print("a_list has {} elements.".format(len(a_list)))
print("the maximum value in a_list is {}.".format(max(a_list)))
print("the minimum value in a_list is {}.".format(min(a_list)))

In [None]:
another_list = ['printer', 5, ['star', 'circle', 9], 5, ['sin', 'cos', 'tan'], (3, 5, 7), 'go fish', 5]

print("{}".format(another_list))
print("another_list has {} elements.".format(len(another_list)))
print("5 is in another_list {} times.".format(another_list.count(5)))

In [None]:
# Use list indices to access specific values in a list
# [0] is the first value; [-1] is the last value
print("{}".format(a_list[0]))
print("{}".format(a_list[1]))
print("{}".format(a_list[2]))
print("{}".format(a_list[-1]))
print("{}".format(a_list[-2]))
print("{}".format(a_list[-3]))
print("{}".format(another_list[2]))
print("{}".format(another_list[-1]))

In [None]:
# Use list slices to access a subset of list values
# Do not include the starting indice to start from the beginning
# Do not include the ending indice to go all of the way to the end
print("{}".format(a_list[:2]))
print("{}".format(another_list[4:]))

In [None]:
# Use append() to add additional values to the end of the list
# Use remove() to remove specific values from the list
# Use pop() to remove values from the end of the list
a_list.append(4)
a_list.append(5)
a_list.append(6)
print("{}".format(a_list))

In [None]:
# Use reverse() to reverse a list, in-place, meaning it changes the list
# To reverse a list without changing the original list, make a copy first
a_list_copy = a_list[:]
a_list_copy.reverse()
print("{}".format(a_list_copy))

In [None]:
# Use sort() to sort a list, in-place, meaning it changes the list
# To sort a list without changing the original list, make a copy first
unordered_list = [3, 5, 1, 7, 2, 8, 4, 9, 0, 6]
print("unordered: {}".format(unordered_list))
list_copy = unordered_list[:]
list_copy.sort()
print("sorted copy: {}".format(list_copy))

## Tuples

In [None]:
# Use parentheses to create a tuple
my_tuple = ('x', 'y', 'z')

print("{}".format(my_tuple))
print("my_tuple has {} elements".format(len(my_tuple)))
print("{}".format(my_tuple[1]))

In [None]:
# Combine tuples
longer_tuple = my_tuple + my_tuple
print("{}".format(longer_tuple))

In [None]:
# Unpack tuples with the left-hand side of an assignment operator
one, two, three = my_tuple
print("{0} {1} {2}".format(one, two, three))

In [None]:
# Swap values between variables
var1 = 'red'
var2 = 'robin'
print("original values: {} {}".format(var1, var2))

var1, var2 = var2, var1
print("swapped values: {} {}".format(var1, var2))

In [None]:
# Convert tuples to lists and lists to tuples
my_list = [1, 2, 3]
my_tuple = ('x', 'y', 'z')

print("list into tuple: {}".format(tuple(my_list)))
print("tuple into list: {}".format(list(my_tuple)))

## Dictionaries

In [None]:
# Use curly braces to create a dictionary
# Use a colon between keys and values in each pair
# len() counts the number of key-value pairs in a dictionary
empty_dict = {}

a_dict = {'one':1, 'two':2, 'three':3}
print("{}".format(a_dict))
print("a_dict has {!s} elements".format(len(a_dict)))

In [None]:
another_dict = {'x':'printer', 'y':5, 'z':['star', 'circle', 9]}
print("{}".format(another_dict))
print("another_dict has {!s} elements".format(len(another_dict)))

In [None]:
# Use keys to access specific values in a dictionary
print("{}".format(a_dict['two']))
print("{}".format(another_dict['z']))

In [None]:
# Use keys(), values(), and items() to access
# a dictionary's keys, values, and key-value pairs, respectively
print("keys: {}".format(a_dict.keys()))
print("values: {}".format(a_dict.values()))
print("items: {}".format(a_dict.items()))

In [None]:
# Iterate through key-value pairs and print them
for key, value in a_dict.items():
    print(key, value)

## Control Flow

In [None]:
# if-else statement
x = 5
if x > 4 and x != 9:
    print("the value of x is {}.".format(x))
else:
    print("x is not greater than 4 or equals 9.")

In [None]:
# if-elif-else statement
y = 7
if y > 6:
    print("y is greater than six.")
elif y > 3:
    print("y^2 = {}".format(y*y))
else:
    print("y is not greater than 3.")

In [None]:
# while loop
x = 0
while x < 6:
    print("{!s}".format(x))
    x += 1

In [None]:
# for loop
months = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']

for month in months:
    print("{!s}".format(month))

In [None]:
for i, month in enumerate(months):
    print("{}: {!s}".format(i, month))

In [None]:
# compact for loops
# list, set, and dictionary comprehensions

# select specific rows using a list comprehension
my_data = [[1,2,3], [4,5,6], [7,8,9]]

rows_to_keep = [row for row in my_data if row[2] > 5]
print("list comprehension: {}".format(rows_to_keep))

In [None]:
# select a set of unique tuples in a list using a set comprehension
my_data = [(1,2,3), (4,5,6), (7,8,9), (7,8,9)]

set_of_tuples = {x for x in my_data}
print("set comprehension: {}".format(set_of_tuples))

In [None]:
# select specific key-value pairs using a dictionary comprehension
my_dictionary = {'customer1': 7, 'customer2': 9, 'customer3': 11}

my_results = {key: value for key, value in my_dictionary.items() if value > 10}
print("dictionary comprehension: {}".format(my_results))

## Functions

In [None]:
def print_birthplace(country = "USA"):
  print("I am from " + country)


print_birthplace()
print_birthplace("Japan")

In [None]:
# calculate the mean of a sequence of numeric values
def calc_mean(numeric_values):
    return sum(numeric_values) / len(numeric_values) if len(numeric_values) > 0 else float('nan')


my_list = [2, 2, 4, 4, 6, 6, 8, 8]
print("mean: {!s}".format(calc_mean(my_list)))

## Exceptions

In [None]:
empty_list = []

def calcMean(numericValues):
    return sum(numericValues)/len(numericValues)

# Short version
try:
    print("{}".format(calcMean(empty_list)))
except ZeroDivisionError as detail:
    print("Error: {}".format(detail))

In [None]:
# Long version
try:
    result = calcMean(empty_list)
except ZeroDivisionError as detail:
    print("Error: {}".format(detail))
else:
    print("The mean is: {}".format(result))
finally:
    print("Finally: The finally block is executed every time")

## Reading CSV File

In [None]:
import csv

In [None]:
CSV_IN_FILE = '../inFiles/randomuser5000.csv'

In [None]:
with open(CSV_IN_FILE, 'r', newline='') as f:
    reader = csv.reader(f)
    header_row = True
    for row in reader:
        if header_row is True:
            print(row)
            header_row = False

In [None]:
FLIGHTS_FILE = '../inFiles/flights.csv'

In [None]:
nrows = 0
ncols = 0

with open(FLIGHTS_FILE, 'r', newline='') as f:
    reader = csv.reader(f)
    header = True
    for row in reader:
        nrows += 1
        if header is True:
            ncols = len(row)
            header = False

print('Rows: {}  Columns: {}'.format(nrows, ncols))

## Writing CSV File

In [None]:
import csv

In [None]:
CSV_IN_FILE = '../inFiles/randomuser5000.csv'

In [None]:
CSV_OUT_FILE = '../outFiles/randomuser5000_females.csv'

In [None]:
with open(CSV_IN_FILE, 'r', newline='') as in_file:
    with open(CSV_OUT_FILE, 'w', newline='') as out_file:
        reader = csv.reader(in_file)
        writer = csv.writer(out_file)
        header = True
        for row in reader:
            if header is True:
                writer.writerow(row)
                header = False
            else:
                gender = row[0].lower()
                if gender == 'female':
                    writer.writerow(row)

print('Finished writing file.')

## Reading JSON File

In [None]:
import json

In [None]:
JSON_IN_FILE = '../inFiles/randomuser5000.json'

In [None]:
with open(JSON_IN_FILE, 'r', newline='') as in_file:  
    data = json.load(in_file)
    records = data['results']
    for record in records[:10]: # only print the first ten records
        for key, value in record.items():
            print('{}: {}'.format(key, value))
        print()

## Writing JSON File

In [None]:
import json

In [None]:
JSON_IN_FILE = '../inFiles/randomuser5000.json'

In [None]:
JSON_OUT_FILE = '../outFiles/randomuser5000_females.json'

In [None]:
with open(JSON_IN_FILE, 'r', newline='') as in_file:
    with open(JSON_OUT_FILE, 'w', newline='') as out_file:
        output_data = {}
        output_data['records'] = []
        
        input_data = json.load(in_file)
        records = input_data['results']
        for record in records:
            gender = record['gender']
            if gender == 'female':
                output_data['records'].append(record)

        if output_data:
            json.dump(output_data, out_file)

print('Finished writing file.')

## Reading Excel File

In [None]:
from datetime import date
from xlrd import open_workbook, xldate_as_tuple

In [None]:
EXCEL_IN_FILE = '../inFiles/randomuser5000.xlsx'

In [None]:
# Introspect an Excel workbook: i.e. Print info about each of the worksheets in the workbook
workbook = open_workbook(EXCEL_IN_FILE)
print('Number of worksheets:', workbook.nsheets)
for worksheet in workbook.sheets():
    print("Worksheet name:", worksheet.name, "\tRows:", worksheet.nrows, "\tColumns:", worksheet.ncols)

In [None]:
# Print all of the rows, converting dates to MM/DD/YYYY format
with open_workbook(EXCEL_IN_FILE) as workbook:
    worksheet = workbook.sheet_by_name('randomuser5000_all')
    for row_index in range(worksheet.nrows):
        output_row = []
        for col_index in range(worksheet.ncols):
            if worksheet.cell_type(row_index, col_index) == 3:
                date_cell = xldate_as_tuple(worksheet.cell_value(row_index, col_index), workbook.datemode)
                date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
                output_row.append(date_cell)
            else:
                non_date_cell = worksheet.cell_value(row_index, col_index)
                output_row.append(non_date_cell)
        print(output_row)

## Writing Excel File

In [None]:
from datetime import date
from xlrd import open_workbook, xldate_as_tuple
from xlwt import Workbook

In [None]:
EXCEL_IN_FILE = '../inFiles/randomuser5000.xlsx'

In [None]:
EXCEL_OUT_FILE = '../outFiles/randomuser5000_females.xls'

In [None]:
output_workbook = Workbook()
output_worksheet = output_workbook.add_sheet('females')

gender_column_index = 0

with open_workbook(EXCEL_IN_FILE) as workbook:
    worksheet = workbook.sheet_by_name('randomuser5000_all')
    output_data = []
    header = worksheet.row_values(0)
    output_data.append(header)
    for row_index in range(1, worksheet.nrows):
        output_row = []
        gender = worksheet.cell_value(row_index, gender_column_index)
        if gender == 'female':
            for column_index in range(worksheet.ncols):
                cell_value = worksheet.cell_value(row_index, column_index)
                cell_type = worksheet.cell_type(row_index, column_index)
                if cell_type == 3:
                    date_cell = xldate_as_tuple(cell_value, workbook.datemode)
                    date_cell = date(*date_cell[0:3]).strftime('%m/%d/%Y')
                    output_row.append(date_cell)
                else:
                    output_row.append(cell_value)
                    
        if output_row:
            output_data.append(output_row)

    for list_index, output_list in enumerate(output_data):
        for element_index, element in enumerate(output_list):
            output_worksheet.write(list_index, element_index, element)

output_workbook.save(EXCEL_OUT_FILE)

print('Finished writing file.')