In [1]:
#constructing conditionals on iterables or in a list comprehension
[num**2 for num in range(10) if num % 2 == 0]

[0, 4, 16, 36, 64]

In [2]:
#dict comprehension
#we're making a dictionary for any number within range 9 that makes a negative value connecting to the positive
pos_neg = {num: - num for num in range(9)}
print (pos_neg)

{0: 0, 1: -1, 2: -2, 3: -3, 4: -4, 5: -5, 6: -6, 7: -7, 8: -8}


In [4]:
#Use member as the iterator variable in the list comprehension. 
#For the conditional, use len() to evaluate the iterator variable. 
#Note that you only want strings with 7 characters or more.

# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member for member in fellowship if len(member) >= 7]

# Print the new list
print(new_fellowship)

['samwise', 'aragorn', 'legolas', 'boromir']


In [5]:
#In the output expression, keep the string as-is if the number of characters is >= 7, 
#else replace it with an empty string - that is, '' or "".
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member if len(member)>=7 else '' for member in fellowship]

# Print the new list
print(new_fellowship)

['', 'samwise', '', 'aragorn', 'legolas', 'boromir', '']


In [6]:
#Create a dict comprehension where the key is a string in fellowship and the value is the length of the string. 
#Use member as the iterator variable.
#Remember to use the syntax <key> : <value> 
#in the output expression part of the comprehension to create the members of the dictionary. 
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create dict comprehension: new_fellowship
new_fellowship = {member: len(member) for member in fellowship}

# Print the new dictionary
print(new_fellowship)

{'frodo': 5, 'samwise': 7, 'merry': 5, 'aragorn': 7, 'legolas': 7, 'boromir': 7, 'gimli': 5}


In [7]:
#Generator expressions example
(2 * num for num in range(10))
#creates a generator object

<generator object <genexpr> at 0x105c624d0>

In [8]:
#list comprehension vs generators
#list comprehension returns a list
#generators returns a generator object
#both can be iterated over. 

In [9]:
#you can generate elements from a generator object/expression 
result = (num for num in range(6))
for num in result: 
    print(num)

0
1
2
3
4
5


In [10]:
result = (num for num in range(6))
print(list(result))

[0, 1, 2, 3, 4, 5]


In [11]:
result = (num for num in range(6))
print(next(result))

0


In [12]:
print(next(result))

1


In [13]:
#generators are especially useful for LARGE dataset. 
#as opposed to printing a dataset list of LARGE numbers [num for nuum in range(10**1000000)]
(num for nuum in range(10**1000000))

<generator object <genexpr> at 0x105c62dc0>

In [14]:
#conditionals in generator expressions
even_nums = (num for num in range(10) if num % 2 == 0)
print(list(even_nums))

[0, 2, 4, 6, 8]


In [15]:
#generator functions
#produces generator objects when called
#defined like a regular function: def 
#yields a sequence of values instead of returning a single value
#generates a value with yield keyword

In [16]:
 #example of generator function: 
def num_sequence(n):
    """Generate values from 0 to n"""
    i = 0
    while i < n: 
        yield i
        i += 1

In [17]:
#another example: 
result = num_sequence(5)
print(type(result))

<class 'generator'>


In [18]:
for item in result:
    print(item)

0
1
2
3
4


In [19]:
#Complete the function header for the function get_lengths() that has a single parameter, input_list.
#In the for loop in the function definition, yield the length of the strings in input_list.
#Complete the iterable part of the for loop for printing the values generated by the get_lengths() generator function. 
#Supply the call to get_lengths(), passing in the list lannister.

# Create a list of strings
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

# Define generator function get_lengths
def get_lengths(input_list):
    """Generator function that yields the
    length of the strings in input_list."""

    # Yield the length of a string
    for person in input_list:
        yield len(person)

# Print the values generated by get_lengths()
for value in get_lengths(lannister):
    print(value)

6
5
5
6
7


In [None]:
#Extract the column 'created_at' from df and assign the result to tweet_time. 
#Fun fact: the extracted column in tweet_time here is a Series data structure!
#Create a list comprehension that extracts the time from each row in tweet_time. 
#Each row is a string that represents a timestamp, and you will access the 12th to 19th characters in the string 
#to extract the time. 
#Use entry as the iterator variable and assign the result to tweet_clock_time. 
#Remember that Python uses 0-based indexing!

# Extract the created_at column from df: tweet_time
tweet_time = df['created_at']

# Extract the clock time: tweet_clock_time
tweet_clock_time = [time[11:19] for time in tweet_time]

# Print the extracted times
print(tweet_clock_time)

#<script.py> output:
    #['23:40:17', '23:40:17', '23:40:17', '23:40:17', '23:40:17', '23:40:17', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:17', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:17', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:18', '23:40:19', '23:40:18', '23:40:18', '23:40:18', '23:40:19', '23:40:19', '23:40:19', '23:40:18', '23:40:19', '23:40:19', '23:40:19', '23:40:18', '23:40:19', '23:40:19', '23:40:19', '23:40:18', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19']

In [None]:
#Extract the column 'created_at' from df and assign the result to tweet_time.
#Create a list comprehension that extracts the time from each row in tweet_time. 
#Each row is a string that represents a timestamp, and you will access the 12th to 19th characters 
#in the string to extract the time. Use entry as the iterator variable and assign the result to tweet_clock_time. 
#Additionally, add a conditional expression that checks whether entry[17:19] is equal to '19'.

# Extract the created_at column from df: tweet_time
tweet_time = df['created_at']

# Extract the clock time: tweet_clock_time
tweet_clock_time = [time[11:19] for time in tweet_time if time[17:19] == '19']

# Print the extracted times
print(tweet_clock_time)

<script.py> output:
    ['23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19', '23:40:19']

In [20]:
#dict comprehension
names = ['bruce', 'clark', 'peter', 'logan', 'wade']
heros = ['batman', 'superman', 'spiderman', 'wolverine', 'deadpool']
new_dict = {name: hero for name in names for hero in heros}
print(new_dict)

{'bruce': 'deadpool', 'clark': 'deadpool', 'peter': 'deadpool', 'logan': 'deadpool', 'wade': 'deadpool'}


In [21]:
#dict comprehension another way
names = ['bruce', 'clark', 'peter', 'logan', 'wade']
heros = ['batman', 'superman', 'spiderman', 'wolverine', 'deadpool']
zip(names, heros)
new_dict = {name: hero for name, hero in zip(names, heros)}
print(new_dict)

{'bruce': 'batman', 'clark': 'superman', 'peter': 'spiderman', 'logan': 'wolverine', 'wade': 'deadpool'}


In [22]:
#conditional dict comprehension 
#return dictionary without peter
names = ['bruce', 'clark', 'peter', 'logan', 'wade']
heros = ['batman', 'superman', 'spiderman', 'wolverine', 'deadpool']
zip(names, heros)
new_dict = {name: hero for name, hero in zip(names, heros) if name != 'peter'}
print(new_dict)

{'bruce': 'batman', 'clark': 'superman', 'logan': 'wolverine', 'wade': 'deadpool'}


In [24]:
#create a set
nums = [1,1,2,1,3,4,3,4,5,5,6,7,8,7,9,9]
my_set = set()
for n in nums:
    my_set.add(n)
print(my_set)
#remember sets don't have duplicate variables

{1, 2, 3, 4, 5, 6, 7, 8, 9}


In [30]:
#set comprehension
nums = [1,1,2,1,3,4,3,4,5,5,6,7,8,7,9,9]
set_num = {n for n in nums}
print(set_num)

{1, 2, 3, 4, 5, 6, 7, 8, 9}


In [None]:
#In the function read_large_file(), read a line from file_object by using the method readline(). Assign the result to data.
#In the function read_large_file(), yield the line read from the file data.
#In the context manager, create a generator object gen_file by calling your generator function read_large_file() 
#and passing file to it.
#Print the first three lines produced by the generator object gen_file using next().

# Define read_large_file()
def read_large_file(file_object):
    """A generator function to read a large file lazily."""

    # Loop indefinitely until the end of the file
    while True:

        # Read a line from the file: data
        data = file_object.readline()

        # Break if this is the end of the file
        if not data:
            break

        # Yield the line of data
        yield data 

        
# Open a connection to the file
with open('world_dev_ind.csv') as file:

    # Create a generator object for the file: gen_file
    gen_file = read_large_file(file)

    # Print the first three lines of the file
    print(next(gen_file))
    print(next(gen_file))
    print(next(gen_file))

#correct output: 
<script.py> output:
    #CountryName,CountryCode,IndicatorName,IndicatorCode,Year,Value
    
    #Arab World,ARB,"Adolescent fertility rate (births per 1,000 women ages 15-19)",SP.ADO.TFRT,1960,133.56090740552298
    
   # Arab World,ARB,Age dependency ratio (% of working-age population),SP.POP.DPND,1960,87.7976011532547
    