In [None]:
# 4. Python Data Science Toolbox (Part 2)

# Intro to iterators, iterables, list generators

powers = ('telepathy',
 'thermokinesis',
 'teleportation',
 'magnetokinesis',
 'intangibility')

mutants = ('charles xavier',
 'bobby drake',
 'kurt wagner',
 'max eisenhardt',
 'kitty pryde')

In [None]:
# Intro to iterators

# Iterate over elements, sequences, for loops .. 
# lists, strings, range objects are all iterables, including dicts etc
# an iter object with an associated iter() and next()functions
# an iterable is an object that can return an iterator, while 
# an iterator is an object that keeps state and produces the next value 
# when you call next() on it. 


In [None]:
# Create a list of strings: flash
flash = ['jay garrick', 'barry allen', 'wally west', 'bart allen']

# Print each list item in flash using a for loop
for f in flash :
    print(f)


# Create an iterator for flash: superhero
superhero = iter(flash)

# Print each item from the iterator
print(next(superhero))
print(next(superhero))
print(next(superhero))
print(next(superhero))


In [None]:
# Create a range object: values
values = range(10,21)

# Print the range object
print(values)

# Create a list of integers: values_list
values_list = list(values)

# Print values_list
print(values_list)

# Get the sum of values: values_sum
values_sum = sum(values)

# Print values_sum
print(values_sum)


In [None]:
# enumerate and zip

# enumerate adds an index to iterable, storing as tuples
# zip() creates a zip object which is an iterator of tuples
# which can be turned into a list and printed
# not a join; like a tuplee grouping of objects in the correct order

In [None]:
# Create a list of strings: mutants
mutants = ['charles xavier', 
            'bobby drake', 
            'kurt wagner', 
            'max eisenhardt', 
            'kitty pryde']

# Create a list of tuples: mutant_list
mutant_list = list(enumerate(mutants))

# Print the list of tuples
print(mutant_list)

# Unpack and print the tuple pairs
for index1, value1 in enumerate(mutants):
    print(index1, value1)

# Change the start index
for index2, value2 in enumerate(mutants, start = 1):
    print(index2, value2)

In [None]:

# There is no unzip function for doing the reverse of what zip()
# does. We can, however, reverse what has been zipped together by using zip() with a little help from *! * unpacks an iterable
# such as a list or a tuple into positional arguments in a function call.

# * operator 'exhausts' the zip object though, so need to reconstruct

# Create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# Print the tuples in z1 by unpacking with *
print(*z1)

# Re-create a zip object from mutants and powers: z1
z1 = zip(mutants, powers)

# 'Unzip' the tuples in z1 by unpacking with * and zip(): result1, result2
result1, result2 = zip(*z1)

# Check if unpacked tuples are equivalent to original tuples
print(result1 == mutants)
print(result2 == powers)


In [None]:
# using iteration to import data, chuck analysis, save results

import pandas as pd

# Define count_entries()
def count_entries(csv_file, c_size, colname):
    """Return a dictionary with counts of
    occurrences as value for each key."""
    
    # Initialize an empty dictionary: counts_dict
    counts_dict = {}

    # Iterate over the file chunk by chunk
    for chunk in pd.read_csv(csv_file, chunksize = c_size):

        # Iterate over the column in DataFrame
        for entry in chunk[colname]:
            if entry in counts_dict.keys():
                counts_dict[entry] += 1
            else:
                counts_dict[entry] = 1

    # Return counts_dict
    return counts_dict

# Call count_entries(): result_counts
result_counts = count_entries('tweets.csv', c_size = 10, colname = 'lang')

# Print result_counts
print(result_counts)


In [None]:
#comprehensions

In [None]:
# list comprhension on a list
# use [] for lists, and for list comprehension

nums = [12,8,21,3]
new_nums = [num + 1 for num in nums]
print(new_nums)

In [None]:
# can also use comprehensions on other iterables
# or to replace nested loops as .. 

pairs_2 = [(num1, num2) for num1 in range(0,2) for num2 in range(6,8)]
print(pairs_2)

In [None]:
# Create a 5 x 5 matrix using a list of lists: matrix
matrix = [[col for col in range(5)] for row in range(5)]

# Print the matrix
for row in matrix:
    print(row)

In [None]:
# conditional list comprehensions
# dict comprehensions

In [None]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member if len(member) >= 7 else '' for member in fellowship]

# Print the new list
print(new_fellowship)

# Create dict comprehension: new_fellowship
new_fellowship_d = {member: len(member) for member in fellowship}

# Print the new dictionary
print(new_fellowship_d)



In [None]:
# .. and generators

# generator like a list comprehension except it does not create and store the expression, 
# instead stores as a geeneator object the recipe to create it, and can be called on 
# to access the values

# use of parentheses () in generator expressions and brackets [] in list comprehensions

# ' lazy evalulation' .. values are not evaluated until called oon / needed

# generator functions are used to 'yield' (not return) generators 
# (whose values can then be accessed when needed)


In [None]:
# Create generator object: result
result = (num for num in range(31))

# Print the first 5 values
print(next(result))
print(next(result))
print(next(result))
print(next(result))
print(next(result))

# Print the rest of the values
for value in result:
    print(value)

In [None]:
import pandas as pd
df = pd.read_csv('tweets.csv')

# Extract the created_at column from df: tweet_time
tweet_time = df['created_at']

# Extract the clock time: tweet_clock_time
tweet_clock_time = [entry[11:19] for entry in tweet_time if entry[17:19] == '19']

# Print the extracted times
print(tweet_clock_time)