## iterators(반복자)
iterable 객체 - 반복 가능한 객체(값을 차례대로 꺼낼 수 있는 객체)  
대표적으로 iterable한 타입 - list, dict, set, str, bytes, tuple, range  

iterable의 정의는 연관된 iter method가 있는 객체  
iter method가 iterable에 적용되면 iterator 객체가 생성  
iterator는 연속된 값을 생성하는 next method은 객체로 정의   

In [1]:
ab = "ab"
it = iter(ab)
next(it)

'a'

In [2]:
next(it)

'b'

In [3]:
# * 를 사용하여 모든값 반환 
word = 'Data'
it = iter(word)
print(*it)
# 수행 후 반복할 값이 더 이상 없기 때문에 다시 수행 불가 , 재수행하기 위해서는 반복자 재정의 필요

D a t a


In [4]:
# Iterating over file connections
file = open('text.txt.txt')
it = iter(file)
print(next(it))

data


In [5]:
# enumerator 함수는 인자의 값을 추출 할 때 인덱스를 추출하는 기법
a = ["A","B","C","D"]
e = enumerate(a)
print(e)

<enumerate object at 0x7f9579d06740>


In [6]:
e_list = list(e)
print(e_list)

[(0, 'A'), (1, 'B'), (2, 'C'), (3, 'D')]


In [7]:
for i, v in enumerate(a, start = 1) :
    print(i,v)

1 A
2 B
3 C
4 D


In [8]:
for i, v in enumerate(["A","B","C"]) :
    print(f"index: {i}, value: {v}")

index: 0, value: A
index: 1, value: B
index: 2, value: C


### iterable 내장함수

In [9]:
# zip 여러 개의 순회 가능한(iterable) 객체를 인자로 받아 동일한 개수로 이루어진 자료형을 묶어서 튜플의 형태로 반환
num = [1,2,3]
s = ["sun","rain","cloud"]

z  = zip(num ,s)
print(z)

<zip object at 0x7f95683c2d40>


In [10]:
z_list = list(z)
print(z_list)

[(1, 'sun'), (2, 'rain'), (3, 'cloud')]


In [11]:
for i,v in zip(num,s):
    print(i,v)

1 sun
2 rain
3 cloud


In [12]:
# 수행 후 반복할 값이 사라짐 
print(*z)




In [13]:
# zip함수를 활용해 dict 생성
num = [1,2,3]
s = ["sun","rain","cloud"]

dict_m = dict(zip(num, s))
print(dict_m)

{1: 'sun', 2: 'rain', 3: 'cloud'}


In [14]:
# enumerater와 zip 혼용
num = ["->","->","->"]
s = ["sun","rain","cloud"]

for i, (n,s) in enumerate(zip(num,s)) :
    print(i,n,s)

0 -> sun
1 -> rain
2 -> cloud


In [15]:
# all(iterable)은 iterable한 객체를 인수로 받으며, 인수의 원소가 모두 참이면 True, 거짓이 하나라도 있 으면 False를 return
print(all([1,2,3]))
print(all([0,1,2])) 

True
False


In [16]:
# any(iterable)은 iterable한 객체를 인수로 받으며, 인수의 원소 중 하나라도 참이면 True, 모두 거짓일때만 False를 리턴, all의 반대
print(any([0, False , True]))

True


### itertools
반복연산 패키지로 자체 또는 조합되어 효율적이고 빠르게 동작하며 operator 모듈과 효과적으로 동작

In [17]:
import itertools 

In [18]:
# count(시작, [step]) 의 함수로 시작 숫자부터 step만큼(없으면 1) 씩 무한히 증가하는 제네레이터
# count 는 반복하고자 하는 최대수를 미리 알지 않아도 되는 경우 사용
# islice(iterable객체, [시작], 정지[,step])의 함수로, iterable한 객체를 특정 범위로 슬라이싱하고 iterator로 반환됩니다.

from itertools import count ,islice

for i in islice(count(10,step = 2), 5) :
    print(i)

10
12
14
16
18


### List comprehensions
[ ( 변수를 활용한 값 ) for ( 사용할 변수 이름 ) in ( 순회할 수 있는 값 )]

In [19]:
# for 문 

empty_list = []
for i in range(1,11):
    empty_list.append(i)  

print(empty_list)

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]


In [20]:
# list comprehensions

em_list = [x for x in range(1,11)]
em_list

[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [21]:
size  = 10
arr = [n for n in range(1,11) if n % 2 == 0]
print(arr)

[2, 4, 6, 8, 10]


In [22]:
# 다중 조건문 (두 if 문 사이에 and 를 명시하지 않아도 and 조건으로 계산 but and 를 명시해주면 오류발생 )
arr =  [n for n in range(1,20) if n & 2 ==0 if n & 5 ==0]
print(arr)

[8, 16]


In [23]:
#  if 문에서 or연산자로 논리 연산 , or 다중 if문에서 or 연산 안됨 
arr =  [n for n in range(1,20) if n & 2 ==0 or n & 5 ==0]
print(arr)

[1, 2, 4, 5, 8, 9, 10, 12, 13, 16, 17, 18]


In [24]:
a = [(n,n1) for n in range(0,2) for n1 in range(2,4)]
print(a)

[(0, 2), (0, 3), (1, 2), (1, 3)]


### Advanced comprehensions
[output expression + conditional on output for iterator variable +_conditional on iterator]


In [25]:
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']


new_fellowship = [member if len(member) >= 7 else '' for member in fellowship]
print(new_fellowship)

['', 'samwise', '', 'aragorn', 'legolas', 'boromir', '']


In [26]:
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']


new_fellowship = { member:len(member) for member in fellowship }
print(new_fellowship)

{'frodo': 5, 'samwise': 7, 'merry': 5, 'aragorn': 7, 'legolas': 7, 'boromir': 7, 'gimli': 5}


In [27]:
# dict
d  = {n : -n for n in range(9)}
d

{0: 0, 1: -1, 2: -2, 3: -3, 4: -4, 5: -5, 6: -6, 7: -7, 8: -8}

In [28]:
# set 
n = {i * i for i in range(0,5)}
print(n)

{0, 1, 4, 9, 16}


### Introduction to generator expressions
제너레이터는 모든 값을 메모리에 담고 있지 않고 그때그때 값을 생성(generator)해서 반환하기 때문에

제너레이터를 사용할 때에는 한 번에 한 개의 값만 순환(iterate)할 수 있다.

In [29]:
[n**2 for n in range(1,3)]

[1, 4]

In [30]:
# generator는 메모리에 저장되지 않는다.
(n**2 for n in range(1,3))

<generator object <genexpr> at 0x7f95683f9f90>

In [31]:
n = (n for num in range(10) if num & 2 == 0)
print(list(n))

[<generator object <genexpr> at 0x7f956840e040>, <generator object <genexpr> at 0x7f956840e040>, <generator object <genexpr> at 0x7f956840e040>, <generator object <genexpr> at 0x7f956840e040>, <generator object <genexpr> at 0x7f956840e040>, <generator object <genexpr> at 0x7f956840e040>]


In [32]:
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

lengths = (len(person) for person in lannister)

for value in lengths:
    print(value)

6
5
5
6
7


In [33]:
# generator function (return X yeield O)
def number(n):
    for i in range(n):
        yield i
        
number(5)

<generator object number at 0x7f956840e3c0>

In [34]:
a = ["ABC", "ABCD","ABCDE"]

def number(l) :
    for i in l:
        yield len(i)

for v in number(a):
    print(v)

3
4
5


In [35]:
import numpy as np
import pandas as pd

df = pd.read_csv('world_ind_pop_data.csv')
df.head()

Unnamed: 0,CountryName,CountryCode,Year,Total Population,Urban population (% of total)
0,Arab World,ARB,1960,92495900.0,31.285384
1,Caribbean small states,CSS,1960,4190810.0,31.59749
2,Central Europe and the Baltics,CEB,1960,91401580.0,44.507921
3,East Asia & Pacific (all income levels),EAS,1960,1042475000.0,22.471132
4,East Asia & Pacific (developing only),EAP,1960,896493000.0,16.917679


In [36]:
feature_names  = df.columns
row_vals = df.iloc[0].values

In [37]:
zipped_lists = zip(feature_names , row_vals)
rs_dict = dict(zipped_lists)

print(rs_dict)

{'CountryName': 'Arab World', 'CountryCode': 'ARB', 'Year': 1960, 'Total Population': 92495902.0, 'Urban population (% of total)': 31.2853842116054}


In [38]:
def lists2dict(list1, list2):
    """Return a dictionary where list1 provides
    the keys and list2 provides the values."""

    zipped_lists = zip(list1, list2)

    rs_dict = dict(zipped_lists)
    
    return rs_dict

rs_fxn = lists2dict(feature_names , row_vals)

print(rs_fxn)

{'CountryName': 'Arab World', 'CountryCode': 'ARB', 'Year': 1960, 'Total Population': 92495902.0, 'Urban population (% of total)': 31.2853842116054}


In [39]:
list_of_dict = [lists2dict(feature_names , sublist) for  sublist in df.values]

print(list_of_dict[0])
print(list_of_dict[1])

{'CountryName': 'Arab World', 'CountryCode': 'ARB', 'Year': 1960, 'Total Population': 92495902.0, 'Urban population (% of total)': 31.2853842116054}
{'CountryName': 'Caribbean small states', 'CountryCode': 'CSS', 'Year': 1960, 'Total Population': 4190810.0, 'Urban population (% of total)': 31.5974898513652}


In [40]:
# DataFrame 

df_list_of_dict = pd.DataFrame(list_of_dict)
df_list_of_dict.head()

Unnamed: 0,CountryName,CountryCode,Year,Total Population,Urban population (% of total)
0,Arab World,ARB,1960,92495900.0,31.285384
1,Caribbean small states,CSS,1960,4190810.0,31.59749
2,Central Europe and the Baltics,CEB,1960,91401580.0,44.507921
3,East Asia & Pacific (all income levels),EAS,1960,1042475000.0,22.471132
4,East Asia & Pacific (developing only),EAP,1960,896493000.0,16.917679


### Processing data in chunks

In [41]:
# Open a connection to the file
with open('world_ind_pop_data.csv') as file:

    # Skip the column names
    file.readline()

    # Initialize an empty dictionary: counts_dict
    counts_dict = {}

    for j in range(0,300):

        # Split the current line into a list: line
        line = file.readline().split(',')

        # Get the value for the first column: first_col
        first_col = line[0]

        # If the column value is in the dict, increment its value
        if first_col in counts_dict.keys():
            counts_dict[first_col] += 1

        # Else, add to the dict and set value to 1
        else:
            counts_dict[first_col] = 1

print(counts_dict)

{'Arab World': 2, 'Caribbean small states': 2, 'Central Europe and the Baltics': 2, 'East Asia & Pacific (all income levels)': 2, 'East Asia & Pacific (developing only)': 2, 'Euro area': 2, 'Europe & Central Asia (all income levels)': 2, 'Europe & Central Asia (developing only)': 2, 'European Union': 2, 'Fragile and conflict affected situations': 2, 'Heavily indebted poor countries (HIPC)': 2, 'High income': 2, 'High income: nonOECD': 2, 'High income: OECD': 2, 'Latin America & Caribbean (all income levels)': 2, 'Latin America & Caribbean (developing only)': 2, 'Least developed countries: UN classification': 2, 'Low & middle income': 2, 'Low income': 2, 'Lower middle income': 2, 'Middle East & North Africa (all income levels)': 2, 'Middle East & North Africa (developing only)': 2, 'Middle income': 2, 'North America': 2, 'OECD members': 2, 'Other small states': 2, 'Pacific island small states': 2, 'Small states': 2, 'South Asia': 2, 'Sub-Saharan Africa (all income levels)': 2, 'Sub-Saha

In [42]:
### lazy evaluation

def read_large_file(file_object):
    """A generator function to read a large file lazily."""

    # Loop indefinitely until the end of the file
    while True:

        # file_object = file 변수명 
        # 전체파일을 라인단위로 모두 읽어 그 결과 값으로 list를 반환
        data = file_object.readline()

        # Break if this is the end of the file
        if not data:
            break

        # Yield the line of data
        yield data
        
# Open a connection to the file
with open('world_ind_pop_data.csv') as file:

    # Create a generator object for the file: gen_file
    gen_file = read_large_file(file)

    print(next(gen_file))
    print(next(gen_file))
    print(next(gen_file))

CountryName,CountryCode,Year,Total Population,Urban population (% of total)

Arab World,ARB,1960,92495902.0,31.285384211605397

Caribbean small states,CSS,1960,4190810.0,31.5974898513652



In [43]:
counts_dict = {}

with open('world_ind_pop_data.csv') as file:

    # Iterate over the generator from read_large_file()
    for line in read_large_file(file):

        row = line.split(',')
        first_col = row[0]

        if first_col in counts_dict.keys():
            counts_dict[first_col] += 1
        else:
            counts_dict[first_col] = 1         
print(counts_dict)

{'CountryName': 1, 'Arab World': 55, 'Caribbean small states': 55, 'Central Europe and the Baltics': 55, 'East Asia & Pacific (all income levels)': 55, 'East Asia & Pacific (developing only)': 55, 'Euro area': 55, 'Europe & Central Asia (all income levels)': 55, 'Europe & Central Asia (developing only)': 55, 'European Union': 55, 'Fragile and conflict affected situations': 55, 'Heavily indebted poor countries (HIPC)': 55, 'High income': 55, 'High income: nonOECD': 55, 'High income: OECD': 55, 'Latin America & Caribbean (all income levels)': 55, 'Latin America & Caribbean (developing only)': 55, 'Least developed countries: UN classification': 55, 'Low & middle income': 55, 'Low income': 55, 'Lower middle income': 55, 'Middle East & North Africa (all income levels)': 55, 'Middle East & North Africa (developing only)': 55, 'Middle income': 55, 'North America': 55, 'OECD members': 55, 'Other small states': 55, 'Pacific island small states': 55, 'Small states': 55, 'South Asia': 55, 'Sub-Sa