In [None]:
# LIST COMPREHENSION

# Reading Multiple Files
# One use case where list comprehensions come in handy is when data is split across multiple files. For example, suppose we had a data directory that contained several CSV files (among other files), each with the same information (columns) for separate groups or divisions. We could use a list comprehension with an endswith('.csv') condition in it to get a list of just the CSV files in that directory. We could use another list comprehension to have Pandas read each of those files and then the pd.concat method to combine them all into a single data set that we can analyze as follows.

import os
import pandas as pd

file_list = [f for f in os.listdir('./data') if f.endswith('.csv')]
data_sets = [pd.read_csv(os.path.join('./data', f)) for f in file_list]
data = pd.concat(data_sets, axis=0)

# Selecting Data Frame Columns Based on Conditions

data = pd.read_csv('vehicles.csv')

selected_columns = [col for col in data._get_numeric_data() if data[col].mean() > 15]
print(selected_columns)

['Year', 'Fuel Barrels/Year', 'City MPG', 'Highway MPG', 'Combined MPG', 'CO2 Emission Grams/Mile', 'Fuel Cost/Year']

In [None]:
# TUPLES

# Tuples are sequences just like list. However, the main difference between tuples and lists is that tuples are immutable. This means that the values inside of a tuple cannot be overwritten (or mutated) once the tuple is defined.

# We define tuples using parentheses and specify the sequence in our tuple as follows:

chocolates = ('dark', 'milk', 'semi sweet')

In [1]:
# DICTS

# Sometimes we don't just want to store data in a sequence. There are cases where we want to easily retrieve our data rather than iterate through an entire list. There are also cases where we need to label our data. For example, the phone numbers stored in our phone are labeled using the name of our contacts. In these cases, it is better to use a dict. Dicts are a sequence of key value pairs. We store the data behind the scenes in a hash map. This means that we use the key to generate a unique index (called a hash) and store the value in the location marked by that index. This makes retrieval very fast.

# We can manually create a dict by specifying all keys and values separated by a colon within curly braces.

contacts = {'John': '312-555-1234', 'Paul': '312-555-3123', 'George': '312-555-3333', 'Ringo': '312-555-2222'}

# Iterating Through a Dict

# We can use keys to iterate through the keys, values() to iterate through the values and items() to iterate through both simultaneously.

for i in contacts.keys():
     print(i)

for i in contacts.values():
    print(i)

for k, v in contacts.items():
     print(k+": "+v)


John
Paul
George
Ringo
312-555-1234
312-555-3123
312-555-3333
312-555-2222
John: 312-555-1234
Paul: 312-555-3123
George: 312-555-3333
Ringo: 312-555-2222
