In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

# List comprehensions

## Writing list comprehensions

In [4]:
# Create list comprehension: squares
squares = [i**2 for i in range(0,10)]

print(squares)

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]


## Nested list comprehensions

In [5]:
# Create a 5 x 5 matrix using a list of lists: matrix
matrix = [[col for col in range(0,5)] for row in range(5)]

# Print the matrix
for row in matrix:
    print(row)

[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]
[0, 1, 2, 3, 4]


# Advanced comprehensions

## Using conditionals in comprehensions (1)

In [6]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member for member in fellowship if len(member) >= 7]

# Print the new list
print(new_fellowship)

['samwise', 'aragorn', 'legolas', 'boromir']


## Using conditionals in comprehensions (2)

In [7]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create list comprehension: new_fellowship
new_fellowship = [member if len(member) >= 7 else '' for member in fellowship]

# Print the new list
print(new_fellowship)

['', 'samwise', '', 'aragorn', 'legolas', 'boromir', '']


## Dict comprehensions

In [8]:
# Create a list of strings: fellowship
fellowship = ['frodo', 'samwise', 'merry', 'aragorn', 'legolas', 'boromir', 'gimli']

# Create dict comprehension: new_fellowship
new_fellowship = {member: len(member) for member in fellowship}

# Print the new dictionary
print(new_fellowship)

{'frodo': 5, 'samwise': 7, 'merry': 5, 'aragorn': 7, 'legolas': 7, 'boromir': 7, 'gimli': 5}


# Introduction to generator expressions

## Write your own generator expressions

In [10]:
# Create generator object: result
result = (num for num in range(31))

# Print the first 5 values
print(next(result))
print(next(result))
print(next(result))
print(next(result))
print(next(result))

# Print the rest of the values
for value in result:
    print(value)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30


## Changing the output in generator expressions

In [11]:
# Create a list of strings: lannister
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

# Create a generator object: lengths
lengths = (len(person) for person in lannister)

# Iterate over and print the values in lengths
for value in lengths:
    print(value)

6
5
5
6
7


## Build a generator


In [12]:
# Create a list of strings
lannister = ['cersei', 'jaime', 'tywin', 'tyrion', 'joffrey']

# Define generator function get_lengths
def get_lengths(input_list):
    """Generator function that yields the
    length of the strings in input_list."""

    # Yield the length of a string
    for person in input_list:
        yield len(person)

# Print the values generated by get_lengths()
for value in get_lengths(lannister):
    print(value)

6
5
5
6
7


# Wrapping up comprehensions and generators

## List comprehensions for time-stamped data

In [15]:
df = pd.read_csv("data/tweets.csv")
pd.set_option("display.max_columns", 1000)

In [16]:
df.head(2)

Unnamed: 0,contributors,coordinates,created_at,entities,extended_entities,favorite_count,favorited,filter_level,geo,id,id_str,in_reply_to_screen_name,in_reply_to_status_id,in_reply_to_status_id_str,in_reply_to_user_id,in_reply_to_user_id_str,is_quote_status,lang,place,possibly_sensitive,quoted_status,quoted_status_id,quoted_status_id_str,retweet_count,retweeted,retweeted_status,source,text,timestamp_ms,truncated,user
0,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [], 'user_mentions': [{'screen_na...","{'media': [{'sizes': {'large': {'w': 1024, 'h'...",0,False,low,,714960401759387648,714960401759387648,,,,,,False,en,,False,,,,0,False,"{'retweeted': False, 'text': "".@krollbondratin...","<a href=""http://twitter.com"" rel=""nofollow"">Tw...",RT @bpolitics: .@krollbondrating's Christopher...,1459294817758,False,"{'utc_offset': 3600, 'profile_image_url_https'..."
1,,,Tue Mar 29 23:40:17 +0000 2016,"{'hashtags': [{'text': 'cruzsexscandal', 'indi...","{'media': [{'sizes': {'large': {'w': 500, 'h':...",0,False,low,,714960401977319424,714960401977319424,,,,,,False,en,,False,,,,0,False,"{'retweeted': False, 'text': '@dmartosko Cruz ...","<a href=""http://twitter.com"" rel=""nofollow"">Tw...",RT @HeidiAlpine: @dmartosko Cruz video found.....,1459294817810,False,"{'utc_offset': None, 'profile_image_url_https'..."


In [18]:
# Extract the created_at column from df: tweet_time
tweet_time = df['created_at']

# Extract the clock time: tweet_clock_time
tweet_clock_time = [entry[11:19] for entry in tweet_time]

# Print the extracted times
tweet_clock_time

['23:40:17',
 '23:40:17',
 '23:40:17',
 '23:40:17',
 '23:40:17',
 '23:40:17',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:17',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:19',
 '23:40:18',
 '23:40:18',
 '23:40:18',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:18',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:18',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:18',
 '23:40:19',

## Conditional list comprehensions for time-stamped data

In [20]:
# Extract the created_at column from df: tweet_time
tweet_time = df['created_at']

# Extract the clock time: tweet_clock_time
tweet_clock_time = [entry[11:19] for entry in tweet_time if entry[17:19] == '19']

# Print the extracted times
tweet_clock_time

['23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19',
 '23:40:19']