# Types of variable assignments in Python

In [2596]:
import collections
import operator

w = 1
print(f'w = {w}')
x,y,z = 2,3,4
print(f'x = {x}, y = {y}, z = {z}')

w = 1
x = 2, y = 3, z = 4


In [2597]:
[x,y,z] = [2,3,4]
print(f'x = {x}, y = {y}, z = {z}')

x = 2, y = 3, z = 4


In [2598]:
x,y,z = [2,3,4]
print(f'x = {x}, y = {y}, z = {z}')

x = 2, y = 3, z = 4


In [2599]:
x,*y = [2,3,4]
print(f'x = {x}, y = {y}')

x = 2, y = [3, 4]


In [2600]:
x,*y = 'Hello'
print(f'x = {x}, y = {y}')

x = H, y = ['e', 'l', 'l', 'o']


In [2601]:
x = y = 5
print(f'x = {x}, y = {y}')

x = 5, y = 5


In [2602]:
x += 1
y -= 1
print(f'x = {x}, y = {y}')

x = 6, y = 4


# Error Handling

In [2603]:
class RecordNotFoundError(Exception):
    def __str__(self):
        return "record not found"

def read_record(id):
    # built-in exception
    if not isinstance(id, int):
        raise ValueError("invalid id")
    # custom exception
    if id > 100:
        raise RecordNotFoundError()
    return {'key': id}

In [2604]:
for id in ['1', 2, 10, 1000]:
    try:
        r = read_record(id)
        # everything is fine here
        print("found record: {} for id: {}".format(r, id))
    except RecordNotFoundError as e:
        # catch specific exceptions
        print("record: {} not found".format(id))
    except Exception as e:
        # catch all other unpredicted exceptions
        print("{} for record: {}".format(e, id))
    else:
        # if no exception occurs
        print("successful fetch from database for record: {}".format(id))
    finally:
        # close resources (e.g. database connection), either exception occurs or not
        print("finish fetching record: {}\n".format(id))

invalid id for record: 1
finish fetching record: 1

found record: {'key': 2} for id: 2
successful fetch from database for record: 2
finish fetching record: 2

found record: {'key': 10} for id: 10
successful fetch from database for record: 10
finish fetching record: 10

record: 1000 not found
finish fetching record: 1000



# Data structures

### List

In [2605]:
cities_list = ["Ottawa", "Calgary", "Edmonton", "Victoria", "Winnipeg", "Toronto", "Montreal"]

In [2606]:
# iterate over values
for city in cities_list:
    print("city : {}".format(city))

city : Ottawa
city : Calgary
city : Edmonton
city : Victoria
city : Winnipeg
city : Toronto
city : Montreal


In [2607]:
# iterate over index and values
for index, city in enumerate(cities_list):
    print("{} - city : {}".format(index, city))

0 - city : Ottawa
1 - city : Calgary
2 - city : Edmonton
3 - city : Victoria
4 - city : Winnipeg
5 - city : Toronto
6 - city : Montreal


In [2608]:
# iterate over index and values
for index in range(len(cities_list)):
    print("{} - city : {}".format(index, cities_list[index]))

0 - city : Ottawa
1 - city : Calgary
2 - city : Edmonton
3 - city : Victoria
4 - city : Winnipeg
5 - city : Toronto
6 - city : Montreal


In [2609]:
# except first two values
for index in range(2, len(cities_list)):
    print("{} - city : {}".format(index, cities_list[index]))

2 - city : Edmonton
3 - city : Victoria
4 - city : Winnipeg
5 - city : Toronto
6 - city : Montreal


In [2610]:
# alternative by sublist
cities_list_sublist = cities_list[2:]
for index in range(len(cities_list_sublist)):
    print("{} - city : {}".format(index, cities_list_sublist[index]))

0 - city : Edmonton
1 - city : Victoria
2 - city : Winnipeg
3 - city : Toronto
4 - city : Montreal


In [2611]:
# items in the list except two last items
print(cities_list[:-2])

['Ottawa', 'Calgary', 'Edmonton', 'Victoria', 'Winnipeg']


In [2612]:
# get every second items
print(cities_list[::2])

['Ottawa', 'Edmonton', 'Winnipeg', 'Montreal']


In [2613]:
# reverse list
print(cities_list[::-1])

['Montreal', 'Toronto', 'Winnipeg', 'Victoria', 'Edmonton', 'Calgary', 'Ottawa']


In [2614]:
# check if Vancouver is in the list
print(cities_list.__contains__('Vancouver'))
# alternative using magic functions:
print('Vancouver' in cities_list)

False
False


In [2615]:
# add Vancouver to the list
cities_list.append('Vancouver')
print(cities_list)
# or:
cities_list += ['Vancouver']
print(cities_list)

['Ottawa', 'Calgary', 'Edmonton', 'Victoria', 'Winnipeg', 'Toronto', 'Montreal', 'Vancouver']
['Ottawa', 'Calgary', 'Edmonton', 'Victoria', 'Winnipeg', 'Toronto', 'Montreal', 'Vancouver', 'Vancouver']


### Set

In [2616]:
cities_set = set(cities_list)
cities_set.add("Ottawa")

In [2617]:
# iterate over values
for city in cities_set:
    print("city : {}".format(city))

city : Victoria
city : Toronto
city : Montreal
city : Calgary
city : Edmonton
city : Vancouver
city : Ottawa
city : Winnipeg


### Tuple

In [2618]:
cities_tuple = tuple(cities_list)
for city in cities_tuple:
    print("city : {}".format(city))

city : Ottawa
city : Calgary
city : Edmonton
city : Victoria
city : Winnipeg
city : Toronto
city : Montreal
city : Vancouver
city : Vancouver


### Dict

In [2619]:
cities_to_province_dict = {"Ottawa": "Ontario", "Calgary": "Alberta", "Edmonton": "Alberta", "Victoria": "British Columbia", "Winnipeg": "Manitoba", "Toronto": "Ontario", "Montreal": "Quebec", "Vancouver": "British Columbia"}

In [2620]:
for index, city in enumerate(cities_to_province_dict):
    print("{} - city : {}, province = {}".format(index, city, cities_to_province_dict[city]))

0 - city : Ottawa, province = Ontario
1 - city : Calgary, province = Alberta
2 - city : Edmonton, province = Alberta
3 - city : Victoria, province = British Columbia
4 - city : Winnipeg, province = Manitoba
5 - city : Toronto, province = Ontario
6 - city : Montreal, province = Quebec
7 - city : Vancouver, province = British Columbia


In [2621]:
for t in cities_to_province_dict.items():
    print("type = {} - city : {}, province = {}".format(type(t), t[0], t[1]))

type = <class 'tuple'> - city : Ottawa, province = Ontario
type = <class 'tuple'> - city : Calgary, province = Alberta
type = <class 'tuple'> - city : Edmonton, province = Alberta
type = <class 'tuple'> - city : Victoria, province = British Columbia
type = <class 'tuple'> - city : Winnipeg, province = Manitoba
type = <class 'tuple'> - city : Toronto, province = Ontario
type = <class 'tuple'> - city : Montreal, province = Quebec
type = <class 'tuple'> - city : Vancouver, province = British Columbia


In [2622]:
for city in cities_to_province_dict.keys():
    print("{}: {}".format(city, cities_to_province_dict[city]))

Ottawa: Ontario
Calgary: Alberta
Edmonton: Alberta
Victoria: British Columbia
Winnipeg: Manitoba
Toronto: Ontario
Montreal: Quebec
Vancouver: British Columbia


# String operations

### Access characters

In [2623]:
s = "Hello"

In [2624]:
for i,char in enumerate(s):
    print("{} - {}".format(i, char))

0 - H
1 - e
2 - l
3 - l
4 - o


### Convert

In [2625]:
s = "1"
i = int(s)
print(f"{i} : {type(i)}")

s = "1.2"
f = float(s)
i = int(f)
print(f"{i} : {type(i)}, {f} : {type(f)}")

1 : <class 'int'>
1 : <class 'int'>, 1.2 : <class 'float'>


In [2626]:
# list to string
s = ", ".join(cities_list)
print(s)

Ottawa, Calgary, Edmonton, Victoria, Winnipeg, Toronto, Montreal, Vancouver, Vancouver


### Search

In [2627]:
s = "This is a sample test. This is a sample test."
key = "test"

# first occurrence
i1 = s.index(key)
print("first occurrence = {}".format(i1))

# second occurrence
i2 = s.index(key, i1+1)
print("second occurrence = {}".format(i2))

# no occurrence
i3 = s.find("no")
print("no occurrence = {}".format(i3))

first occurrence = 17
second occurrence = 40
no occurrence = -1


In [2628]:
# all occurrences
i = 0
while True:
    i = s.find(key, i + 1, len(s))
    print("next occurrence = {}".format(i))
    if i < 0:
        break

next occurrence = 17
next occurrence = 40
next occurrence = -1


### Preparation

In [2629]:
# trim
s = ",, Hello "
print("trimmed = '{}'".format(s.strip(", ")))

trimmed = 'Hello'


In [2630]:
# a sample from a dataset
s = "The recorded videos are not accessible in the video logs tab,0.16"

# get score
text,score = s.split(",")
score = float(score)
print("text = {}, score = {}".format(text, score))

text = The recorded videos are not accessible in the video logs tab, score = 0.16


In [2631]:
# split lines of text
# a sample from a dataset
s = """
The recorded videos are not accessible in the video logs tab,0.16
Backup scenario and restore for dynamodb,-0.01
Add Search function to the parent directory table,0.06
"""
for line in s.splitlines():
    line = line.strip()
    if not len(line):
        continue
    print("line = '{}'".format(line))

line = 'The recorded videos are not accessible in the video logs tab,0.16'
line = 'Backup scenario and restore for dynamodb,-0.01'
line = 'Add Search function to the parent directory table,0.06'


In [2632]:
# Regex
# a sample from a dataset
s = """
[ISSUE-891] The recorded videos are not accessible in the video logs tab,0.16
[ISSUE-86] Backup scenario and restore for dynamodb,-0.01
[ISSUE-1290] Add Search function to the parent directory table,0.06
"""

import re
for line in s.splitlines():
    line = re.sub("^\[ISSUE-[0-9]*\] ", "", line)
    if not len(line):
        continue
    print("line = {}".format(line))

line = The recorded videos are not accessible in the video logs tab,0.16
line = Backup scenario and restore for dynamodb,-0.01
line = Add Search function to the parent directory table,0.06


read more at: https://www.w3schools.com/python/python_regex.asp

# Functional programming

In [2633]:
# list comprehensions
print("dict = {}".format(cities_to_province_dict))

# get list of provinces
provinces = []
for city in cities_to_province_dict:
    provinces.append(cities_to_province_dict[city])
print("provinces = {}".format(provinces))

# alternative
provinces = [cities_to_province_dict[city] for city in cities_to_province_dict]
print("provinces = {}".format(provinces))

dict = {'Ottawa': 'Ontario', 'Calgary': 'Alberta', 'Edmonton': 'Alberta', 'Victoria': 'British Columbia', 'Winnipeg': 'Manitoba', 'Toronto': 'Ontario', 'Montreal': 'Quebec', 'Vancouver': 'British Columbia'}
provinces = ['Ontario', 'Alberta', 'Alberta', 'British Columbia', 'Manitoba', 'Ontario', 'Quebec', 'British Columbia']
provinces = ['Ontario', 'Alberta', 'Alberta', 'British Columbia', 'Manitoba', 'Ontario', 'Quebec', 'British Columbia']


In [2634]:
# get provinces of cities with population more than 1000000
cities_population = {'Ottawa': 900000, 'Calgary': 1300000, 'Edmonton': 981000, 'Victoria': 92000, 'Winnipeg': 750000, 'Toronto': 2900000, 'Montreal': 1800000, "Vancouver": 700000}

provinces_of_metropolitan_cities = [cities_to_province_dict[city] for city in cities_to_province_dict if cities_population[city] > 1000000]
print("provinces_of_metropolitan_cities = {}".format(provinces_of_metropolitan_cities))

provinces_of_metropolitan_cities = ['Alberta', 'Ontario', 'Quebec']


### Example

In [2635]:
data = [
    {
        "city": "Calgary",
        "temperatures": [
            {
                "date": "2022-09-06",
                "time": "00:00",
                "value": "12",
            },
            {
                "date": "2022-09-06",
                "time": "06:00",
                "value": "17",
            },
            {
                "date": "2022-09-06",
                "time": "12:00",
                "value": "30",
            },
            {
                "date": "2022-09-06",
                "time": "18:00",
                "value": "25",
            },
            {
                "date": "2022-09-07",
                "time": "00:00",
                "value": "18",
            },
            {
                "date": "2022-09-07",
                "time": "06:00",
                "value": "22",
            },
            {
                "date": "2022-09-07",
                "time": "12:00",
                "value": "27",
            },
            {
                "date": "2022-09-07",
                "time": "18:00",
                "value": "21",
            }
        ]
    }
]

In [2636]:
# convert to dict
city_to_temperature = {item["city"]:item["temperatures"] for item in data}
print(city_to_temperature)

{'Calgary': [{'date': '2022-09-06', 'time': '00:00', 'value': '12'}, {'date': '2022-09-06', 'time': '06:00', 'value': '17'}, {'date': '2022-09-06', 'time': '12:00', 'value': '30'}, {'date': '2022-09-06', 'time': '18:00', 'value': '25'}, {'date': '2022-09-07', 'time': '00:00', 'value': '18'}, {'date': '2022-09-07', 'time': '06:00', 'value': '22'}, {'date': '2022-09-07', 'time': '12:00', 'value': '27'}, {'date': '2022-09-07', 'time': '18:00', 'value': '21'}]}


In [2637]:
# convert temperatures to float
city_to_temperature_norm = {k: list(map(lambda item: {'date': item['date'], 'time': item['time'], 'value': float(item['value'])}, city_to_temperature[k])) for k in city_to_temperature.keys()}
print(city_to_temperature_norm)

{'Calgary': [{'date': '2022-09-06', 'time': '00:00', 'value': 12.0}, {'date': '2022-09-06', 'time': '06:00', 'value': 17.0}, {'date': '2022-09-06', 'time': '12:00', 'value': 30.0}, {'date': '2022-09-06', 'time': '18:00', 'value': 25.0}, {'date': '2022-09-07', 'time': '00:00', 'value': 18.0}, {'date': '2022-09-07', 'time': '06:00', 'value': 22.0}, {'date': '2022-09-07', 'time': '12:00', 'value': 27.0}, {'date': '2022-09-07', 'time': '18:00', 'value': 21.0}]}


In [2638]:
# filter temperatures of Calgary higher than 20
calgary_temp = city_to_temperature_norm["Calgary"]
calgary_temp_higher_than_20 = list(filter(lambda item: item["value"] > 20, calgary_temp))
print(calgary_temp_higher_than_20)

[{'date': '2022-09-06', 'time': '12:00', 'value': 30.0}, {'date': '2022-09-06', 'time': '18:00', 'value': 25.0}, {'date': '2022-09-07', 'time': '06:00', 'value': 22.0}, {'date': '2022-09-07', 'time': '12:00', 'value': 27.0}, {'date': '2022-09-07', 'time': '18:00', 'value': 21.0}]


In [2639]:
# Alternative solution
def filter_by_temp(item):
    return item["value"] > 20
calgary_temp_higher_than_20 = list(filter(filter_by_temp, calgary_temp))
print(calgary_temp_higher_than_20)

[{'date': '2022-09-06', 'time': '12:00', 'value': 30.0}, {'date': '2022-09-06', 'time': '18:00', 'value': 25.0}, {'date': '2022-09-07', 'time': '06:00', 'value': 22.0}, {'date': '2022-09-07', 'time': '12:00', 'value': 27.0}, {'date': '2022-09-07', 'time': '18:00', 'value': 21.0}]


In [2640]:
# average of temperature in Calgary
from functools import reduce

calgary_all_temp = list(map(lambda item: item['value'],calgary_temp))
sum_temp = reduce(lambda a,b: a+b, calgary_all_temp)
num_temp = len(calgary_all_temp)
avg_temp = sum_temp / num_temp
print("avg = {}".format(avg_temp))

avg = 21.5


In [2641]:
# maximum temerature in Calgary
max_temp = reduce(lambda a,b: a if a > b else b, calgary_all_temp)
print("max = {}".format(max_temp))

max = 30.0


In [2642]:
# alternative
max_temp = reduce(max, calgary_all_temp)
print("max = {}".format(max_temp))

max = 30.0


In [2643]:
# sort Calgary temperatures
sorted_calgary_temp = sorted(calgary_temp, key=lambda item: item['value'])
print("sorted_calgary_temp = {}".format(sorted_calgary_temp))

sorted_calgary_temp = [{'date': '2022-09-06', 'time': '00:00', 'value': 12.0}, {'date': '2022-09-06', 'time': '06:00', 'value': 17.0}, {'date': '2022-09-07', 'time': '00:00', 'value': 18.0}, {'date': '2022-09-07', 'time': '18:00', 'value': 21.0}, {'date': '2022-09-07', 'time': '06:00', 'value': 22.0}, {'date': '2022-09-06', 'time': '18:00', 'value': 25.0}, {'date': '2022-09-07', 'time': '12:00', 'value': 27.0}, {'date': '2022-09-06', 'time': '12:00', 'value': 30.0}]


In [2644]:
# get average of temperature for Calgary per date
# group temperature by date
from itertools import groupby

for date, group in groupby(calgary_temp, lambda item: item['date']):
    list_temp = list(group)
    temp_values = map(lambda item: item['value'], list_temp)
    sum_temp = reduce(operator.add, temp_values)
    avg_temp = sum_temp / len(list_temp)
    print("date = {}, avg = {}".format(date, avg_temp))

date = 2022-09-06, avg = 21.0
date = 2022-09-07, avg = 22.0


# File operations

In [2645]:
# list all csv or text files in the current directory

import os
import re

dir_name = "."
files = [os.path.join(dir_name, f) for f in os.listdir(dir_name) if re.match(r'.*\.(csv|txt|text)', f)]
print(files)

['./data.csv']


In [2646]:
# name of the busiest airports in the world
file = files[0]
# read file into text
with open(file, "r") as f:
    text = f.read()
# split lines
lines = text.split("\n")
# skip first row
lines = lines[1:]

airports = {}
# iterate over lines
for line in lines:
    # get the number column
    count = re.findall('\"[0-9,]+\"', line)[0]
    # remove non-digit chars
    count = re.sub(r'[^\d]', '', count)
    # cast to int
    count = int(count)
    sp = line.split(",")
    # get country
    country = sp[4]
    # airport name
    airport = sp[1]
    airports[airport] = {'country': country, 'count': count}

print(airports)

{'Hartsfield–Jackson Atlanta International Airport': {'country': 'United States', 'count': 103902992}, 'Beijing Capital International Airport': {'country': 'China', 'count': 95786442}, 'Dubai International Airport': {'country': 'United Arab Emirates', 'count': 88242099}, 'Los Angeles International Airport': {'country': 'United States', 'count': 84557968}, "O'Hare International Airport": {'country': 'United States', 'count': 79828183}, 'Heathrow Airport': {'country': 'United Kingdom', 'count': 78014598}, 'Haneda Airport': {'country': 'Japan', 'count': 76476251}, 'Hong Kong International Airport': {'country': 'Hong Kong', 'count': 72665078}, 'Shanghai Pudong International Airport': {'country': 'China', 'count': 70001237}, 'Charles de Gaulle International Airport': {'country': 'France', 'count': 69471442}, 'Amsterdam Airport Schiphol': {'country': 'Netherlands', 'count': 68515425}, 'Dallas-Fort Worth International Airport': {'country': 'United States', 'count': 67092194}, 'Guangzhou Baiyu