List Comprenhensions
===

* *90:00 min* | Última modificación: Agosto 26, 2021 | [YouTube]

In [1]:
#
# Llenado de una lista con append
#
squares = []
for x in range(10):
    squares.append(x**2)

squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [2]:
#
# Comprenhension equivalente
#
squares = [x**2 for x in range(10)]
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [3]:
#
# Código equivalente usando map()
#
squares = list(map(lambda x: x**2, range(10)))
squares

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [4]:
#
# Creación de pares usando ciclos for anidados
#
combs = []
for x in [1,2,3]:
    for y in [3,1,4]:
        if x != y:
            combs.append((x, y))

combs

[(1, 3), (1, 4), (2, 3), (2, 1), (2, 4), (3, 1), (3, 4)]

In [5]:
#
# Código equivalente usando un comprenhension
#
[(x, y) for x in [1,2,3] for y in [3,1,4] if x != y]

[(1, 3), (1, 4), (2, 3), (2, 1), (2, 4), (3, 1), (3, 4)]

In [6]:
#
# Creación de una lista de listas
#
[list(range(5))  for _ in range(5)]

[[0, 1, 2, 3, 4],
 [0, 1, 2, 3, 4],
 [0, 1, 2, 3, 4],
 [0, 1, 2, 3, 4],
 [0, 1, 2, 3, 4]]

In [7]:
#
# iteracción sobre strings -- MAL
#
nums = ""
for n in range(20):
    nums += str(n)
print(nums)

012345678910111213141516171819


In [8]:
#
# Iteración sobre strings -- BIEN
#
nums = []
for n in range(20):
    nums.append(str(n))
print("".join(nums))

012345678910111213141516171819


In [9]:
#
# iteración sobre strings -- MEJOR
#
nums = [str(n) for n in range(20)]
"".join(nums)

'012345678910111213141516171819'

In [10]:
#
# Condicionales. if-then
#
[x for x in range(10) if x < 5]

[0, 1, 2, 3, 4]

In [11]:
#
# Condicionales. if-else
#
[x if x < 5 else 0 for x in range(10) ]

[0, 1, 2, 3, 4, 0, 0, 0, 0, 0]

In [12]:
#
# Dict comprenhension
#
{letter: i_letter for i_letter, letter in enumerate(['A', 'B', 'C', 'D'])}

{'A': 0, 'B': 1, 'C': 2, 'D': 3}

## Construcción de un generador usando una comprenhension

In [13]:
#
# Numeros del 1 al 20 que contienen un '1'
#
import re

[str(x) for x in range(1,21) if re.search('1', str(x))]

['1', '10', '11', '12', '13', '14', '15', '16', '17', '18', '19']

In [14]:
generador = (i**2 for i in range(10))
for element in generador:
    print(element)

0
1
4
9
16
25
36
49
64
81


In [15]:
generador = (i**2 for i in range(10))
display(
    next(generador),
    next(generador)
)

0

1

## Uso de generadores en funciones

In [16]:
def num_sequence(n):
    counter = 0
    while counter < n:
        yield counter
        counter += 1
        
        
for value in num_sequence(5):
    print(value)

0
1
2
3
4


## Uso de generadores para leer datos

In [17]:
data_url = "https://raw.githubusercontent.com/jdvelasq/datalabs/master/datasets/world_bank_development_Indicators.csv" 
!wget --quiet {data_url} -P /tmp/ 


def read_large_file(file_object):
    
    while True:

        data = file_object.readline()

        if not data:
            break

        yield data
        
        
with open('/tmp/world_bank_development_Indicators.csv', 'r') as file:
    
    gen_file = read_large_file(file)
    
    print(next(gen_file), end='')
    print(next(gen_file), end='')
    print(next(gen_file), end='')

CountryName,CountryCode,Year,Total Population,Urban population (% of total)
Arab World,ARB,1960,92495902.0,31.285384211605397
Caribbean small states,CSS,1960,4190810.0,31.5974898513652


## Procesamiento de un archivo usando list comprenhensions

In [18]:
%%writefile out.1
Date, Year, CustomerID, Value
2013-01-12, 2013, 1, 100
2014-05-12, 2014, 1, 100
2013-02-25, 2013, 2, 200
2013-04-04, 2013, 1, 100
2013-06-21, 2013, 2, 200
2014-05-18, 2014, 1, 100
2014-06-23, 2014, 2, 200
2013-02-28, 2013, 1, 100
2013-08-02, 2013, 1, 100

Writing out.1


In [19]:
x = open('out.1','r').readlines()
x

['Date, Year, CustomerID, Value\n',
 '2013-01-12, 2013, 1, 100\n',
 '2014-05-12, 2014, 1, 100\n',
 '2013-02-25, 2013, 2, 200\n',
 '2013-04-04, 2013, 1, 100\n',
 '2013-06-21, 2013, 2, 200\n',
 '2014-05-18, 2014, 1, 100\n',
 '2014-06-23, 2014, 2, 200\n',
 '2013-02-28, 2013, 1, 100\n',
 '2013-08-02, 2013, 1, 100\n']

In [20]:
x = [z.replace('\n', '') for z in x]
x

['Date, Year, CustomerID, Value',
 '2013-01-12, 2013, 1, 100',
 '2014-05-12, 2014, 1, 100',
 '2013-02-25, 2013, 2, 200',
 '2013-04-04, 2013, 1, 100',
 '2013-06-21, 2013, 2, 200',
 '2014-05-18, 2014, 1, 100',
 '2014-06-23, 2014, 2, 200',
 '2013-02-28, 2013, 1, 100',
 '2013-08-02, 2013, 1, 100']

In [21]:
x = [z.split(',') for z in x]
x

[['Date', ' Year', ' CustomerID', ' Value'],
 ['2013-01-12', ' 2013', ' 1', ' 100'],
 ['2014-05-12', ' 2014', ' 1', ' 100'],
 ['2013-02-25', ' 2013', ' 2', ' 200'],
 ['2013-04-04', ' 2013', ' 1', ' 100'],
 ['2013-06-21', ' 2013', ' 2', ' 200'],
 ['2014-05-18', ' 2014', ' 1', ' 100'],
 ['2014-06-23', ' 2014', ' 2', ' 200'],
 ['2013-02-28', ' 2013', ' 1', ' 100'],
 ['2013-08-02', ' 2013', ' 1', ' 100']]

In [22]:
# extrae el campo Date
[z[0] for z in x[1:]]

['2013-01-12',
 '2014-05-12',
 '2013-02-25',
 '2013-04-04',
 '2013-06-21',
 '2014-05-18',
 '2014-06-23',
 '2013-02-28',
 '2013-08-02']

In [23]:
# separa Date en sus partes
[z[0].split('-') for z in x[1:]]

[['2013', '01', '12'],
 ['2014', '05', '12'],
 ['2013', '02', '25'],
 ['2013', '04', '04'],
 ['2013', '06', '21'],
 ['2014', '05', '18'],
 ['2014', '06', '23'],
 ['2013', '02', '28'],
 ['2013', '08', '02']]

In [24]:
# el mes ocupa la posicion 1
[z[0].split('-')[1] for z in x[1:]] # el mes

['01', '05', '02', '04', '06', '05', '06', '02', '08']

In [25]:
x[1:] = [z+[z[0].split('-')[1]] for z in x[1:]]
x

[['Date', ' Year', ' CustomerID', ' Value'],
 ['2013-01-12', ' 2013', ' 1', ' 100', '01'],
 ['2014-05-12', ' 2014', ' 1', ' 100', '05'],
 ['2013-02-25', ' 2013', ' 2', ' 200', '02'],
 ['2013-04-04', ' 2013', ' 1', ' 100', '04'],
 ['2013-06-21', ' 2013', ' 2', ' 200', '06'],
 ['2014-05-18', ' 2014', ' 1', ' 100', '05'],
 ['2014-06-23', ' 2014', ' 2', ' 200', '06'],
 ['2013-02-28', ' 2013', ' 1', ' 100', '02'],
 ['2013-08-02', ' 2013', ' 1', ' 100', '08']]

In [26]:
x[0].append('Month')
x

[['Date', ' Year', ' CustomerID', ' Value', 'Month'],
 ['2013-01-12', ' 2013', ' 1', ' 100', '01'],
 ['2014-05-12', ' 2014', ' 1', ' 100', '05'],
 ['2013-02-25', ' 2013', ' 2', ' 200', '02'],
 ['2013-04-04', ' 2013', ' 1', ' 100', '04'],
 ['2013-06-21', ' 2013', ' 2', ' 200', '06'],
 ['2014-05-18', ' 2014', ' 1', ' 100', '05'],
 ['2014-06-23', ' 2014', ' 2', ' 200', '06'],
 ['2013-02-28', ' 2013', ' 1', ' 100', '02'],
 ['2013-08-02', ' 2013', ' 1', ' 100', '08']]

In [27]:
[z for z in x if z[1] == ' 2013']

[['2013-01-12', ' 2013', ' 1', ' 100', '01'],
 ['2013-02-25', ' 2013', ' 2', ' 200', '02'],
 ['2013-04-04', ' 2013', ' 1', ' 100', '04'],
 ['2013-06-21', ' 2013', ' 2', ' 200', '06'],
 ['2013-02-28', ' 2013', ' 1', ' 100', '02'],
 ['2013-08-02', ' 2013', ' 1', ' 100', '08']]