In [2]:
data = [
    {"name":"greg","age":103},
    {"name":"santa","age":800},
    {"name":"dorothy","age":8},
]

In [4]:
# operations on the "projection"
[item["name"].upper() for item in data]

['GREG', 'SANTA', 'DOROTHY']

In [5]:
def double(x):
    return x+x

[double(item["age"]) for item in data] # mapping


[206, 1600, 16]

In [6]:
# selection means getting _some_ records

[item for item in data if item['name'] == "santa"]

[{'name': 'santa', 'age': 800}]

In [7]:
# we can combine these concepts

[double(item["age"]) for item in data if item['name'] == "santa"]

[1600]

In [8]:
# selections can be more complicated

[double(item["age"]) for item in data if item['name'] in ["greg","santa"]]

[206, 1600]

In [None]:
# selections can be more complicated

def is_interesting(name):
    return name in ["greg","santa"]

def get_age(item):
    return item["age"]

[double(get_age(item)) for item in data if is_interesting(item['name'])]

[206, 1600]

In [None]:
data = [
    {"name":"greg","age":103},
    {"name":"santa","age":800},
    {"name":"dorothy","food":"treats","age":8},
]

food = item["food"]   # <<== error if no "food" entry
food = item.get("food","cookies") # <<== default value if missing



In [17]:
# v = [1,3,5,1,7,2,3,7,9]  <-- numpy means it's very efficient

# instead of data above (list of dictionaries) 
# we can have

data = {
    "name":["greg","santa","dorothy"], # <- a million of these
    "age":[103,800,8] # <- a million of these
}

# we only have to store the structure _once_

# a structure that does this using efficient routines is called a 
# "DataFrame"

# We have in python a dataframe library called "Pandas"


In [18]:
import pandas as pd
import numpy as np 

In [19]:
data = [
    {"name":"greg","age":103},
    {"name":"santa","age":800},
    {"name":"dorothy","age":8},
]

df = pd.DataFrame(data) 

df

Unnamed: 0,name,age
0,greg,103
1,santa,800
2,dorothy,8


In [20]:
print(df)

      name  age
0     greg  103
1    santa  800
2  dorothy    8


In [21]:
df.describe()

Unnamed: 0,age
count,3.0
mean,303.666667
std,432.453851
min,8.0
25%,55.5
50%,103.0
75%,451.5
max,800.0


In [23]:
df.head(1)

Unnamed: 0,name,age
0,greg,103
