### iterating over iterables (1)

In [2]:
# create a list of strings: list is an iterable
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

# print each list item in flash using a for loop
# you can iterate over iterable object
for person in flash:
    print(person)

# create an iterator object form a list
superspeed = iter(flash)

# calling next will iterate manually over a iterable
print(next(superspeed))
print(next(superspeed))
print(next(superspeed))
print(next(superspeed))

jay garrick
barry allen
wally west
bart allen
jay garrick
barry allen
wally west
bart allen


### iterating over iterables (2): range() functions

In [6]:
# not all iterables are actual lists. range() function creates a range object with an iterator
# that will keep producing values until it reaches the limit, it does not creates an actual lists
# to save up the memory

# create an iterator for range(3)
small_value = iter(range(3))

# print out each individual value
print(next(small_value))
print(next(small_value))
print(next(small_value))

# iterate over the range 
for num in range(3):
    print(num)
    
# we can create a large array which is not possible with array if we want to iterate over
googol = iter(range(10 ** 100))

print(next(googol))
print(next(googol))
print(next(googol))
print(next(googol))
print(next(googol))

0
1
2
0
1
2
0
1
2
3
4


### iterators as function arguments

In [8]:
# there are also functions that take interators as arguments.
# for example list() and num() func returns a list and sum of elements.
# you can use these functions by passing an iterator from range() and then printing the results 

# create a range object
values = range(10, 21)

# print the range object
print(values)

# create a list of integers based off the iterator
values_list = list(values)
print(values_list)

# get the sum of values
values_sum = sum(values)
print(values_sum)

range(10, 21)
[10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
165


### using enumerate

In [2]:
# enumerate return a 'special' object called enumerate that produces 
# a sequence of tuples and each of the tuples is an index-value pair

# create a list
mutant = ['charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde']

# create a list of tuple using enumerate
mutant_list = enumerate(mutant)

# print the list of tuples
print(list(mutant_list))

# unpack and print the tuple pairs
for index1, value1 in mutant_list:
    print(index1, value1)

for index2, value2 in enumerate(mutant, start=1):
    print(index2, value2)

[(0, 'charles xavier'), (1, 'bobby drake'), (2, 'kurt wagner'), (3, 'max eisenhardt'), (4, 'kitty pryde')]
1 charles xavier
2 bobby drake
3 kurt wagner
4 max eisenhardt
5 kitty pryde


### using zip

In [12]:
# zip return a 'special' object called zip. zip() takes any number of iterables
# and returns an iterator of tuples. You can convert it to a list and then print it.
mutants = ('charles xavier', 'bobby drake', 'kurt wagner', 'max eisenhardt', 'kitty pryde')
aliases = ('prof x', 'iceman', 'nightcrawler', 'magneto', 'shadowcat')
powers = ('telepathy', 'thermokinesis', 'teleportation', 'magnetokinesis', 'intangibility')

# Create a list of tuples: mutant_data
mutant_data = list(zip(mutants, aliases, powers))

# Print the list of tuples
print(mutant_data)

# Create a zip object using the three lists: mutant_zip
mutant_zip = zip(mutants, aliases, powers)

# Print the zip object
print(mutant_zip)

# Unpack the zip object and print the tuple values
for value1, value2, value3 in mutant_zip:
    print(value1, value2, value3)

[('charles xavier', 'prof x', 'telepathy'), ('bobby drake', 'iceman', 'thermokinesis'), ('kurt wagner', 'nightcrawler', 'teleportation'), ('max eisenhardt', 'magneto', 'magnetokinesis'), ('kitty pryde', 'shadowcat', 'intangibility')]
<zip object at 0x111a9c448>
charles xavier prof x telepathy
bobby drake iceman thermokinesis
kurt wagner nightcrawler teleportation
max eisenhardt magneto magnetokinesis
kitty pryde shadowcat intangibility


### using * and zip to 'unzip'

In [14]:
# using * will unpacks an iterable into positional arguments in a function call
# Create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# Print the tuples in z1 by unpacking with *
print(*z1)

# Re-create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# 'Unzip' the tuples in z1 by unpacking with * and zip(): result1, result2
result1, result2 = zip(*z1)

# Check if unpacked tuples are equivalent to original tuples
print(result1 == mutants)
print(result2 == powers)

('charles xavier', 'telepathy') ('bobby drake', 'thermokinesis') ('kurt wagner', 'teleportation') ('max eisenhardt', 'magnetokinesis') ('kitty pryde', 'intangibility')
True
True


## Bringing it all together

### Processing large amounts of Twitter data

In [22]:
# sometimes the data we have to process reaches a size that is
# too much for a computer's memory to handle. 
# the solution is to process an entire data source chunk by chunk
# in pandas' read_csv you can specify this chuck for the entries
import pandas as pd

tweets = pd.read_csv('datasets/tweets.csv', chunksize=10)

counts_dict = {}

for chunk in tweets:
    for entry in chunk['lang']:
        if entry in counts_dict.keys():
            counts_dict[entry] += 1
        else:
            counts_dict[entry] = 1

print(counts_dict)

{'en': 97, 'et': 1, 'und': 2}


### extracting information for lage amounts of Twitter data: make a function for it!

In [23]:
# Define count_entries()
def count_entries(csv_file, c_size, colname):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    # Initialize an empty dictionary: counts_dict
    counts_dict = {}

    # Iterate over the file chunk by chunk
    for chunk in pd.read_csv(csv_file, chunksize=c_size):

        # Iterate over the column in DataFrame
        for entry in chunk[colname]:
            if entry in counts_dict.keys():
                counts_dict[entry] += 1
            else:
                counts_dict[entry] = 1

    # Return counts_dict
    return counts_dict

# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv', 10, 'lang')

# Print result_counts
print(result_counts)

FileNotFoundError: File b'tweets.csv' does not exist