In [2]:
import pandas as pd
import numpy as np
from pydataset import data
from scipy import stats

In [2]:
Mammals = data("Mammals")
data("Mammals", show_doc=True)

Mammals

PyDataset Documentation (adopted from R Documentation. The displayed examples are in R)

## Garland(1983) Data on Running Speed of Mammals

### Description

Observations on the maximal running speed of mammal species and their body
mass.

### Usage

    data(Mammals)

### Format

A data frame with 107 observations on the following 4 variables.

weight

Body mass in Kg for "typical adult sizes"

speed

Maximal running speed (fastest sprint velocity on record)

hoppers

logical variable indicating animals that ambulate by hopping, e.g. kangaroos

specials

logical variable indicating special animals with "lifestyles in which speed
does not figure as an important factor": Hippopotamus, raccoon (Procyon),
badger (Meles), coati (Nasua), skunk (Mephitis), man (Homo), porcupine
(Erithizon), oppossum (didelphis), and sloth (Bradypus)

### Details

Used by Chappell (1989) and Koenker, Ng and Portnoy (1994) to illustrate the
fitting of piecewise linear curves.

### Source

Garland, T. (

In [3]:
Mammals.columns

Index(['weight', 'speed', 'hoppers', 'specials'], dtype='object')

In [7]:
Mammals.index

Int64Index([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,
            ...
             98,  99, 100, 101, 102, 103, 104, 105, 106, 107],
           dtype='int64', length=107)

In [14]:
hoppers_above_med_sp = (Mammals.speed > Mammals.speed.median()) & (Mammals.hoppers == True)
Mammals[hoppers_above_med_sp]

Unnamed: 0,weight,speed,hoppers,specials
96,4.6,64.0,True,False
97,4.4,72.0,True,False
98,4.0,72.0,True,False
99,3.5,56.0,True,False
100,2.0,64.0,True,False
101,1.9,56.0,True,False
102,1.5,50.0,True,False


In [9]:
Mammals["speed"].median()

48.0

In [15]:
# employees[employees.hire_dat.str.contains("12-25")]

# mpg[mpg.hwy > 35]["hwy", "model"]
# SELECT hwy, model
# FROM mpg
# WHERE hwy > 35

# mpg[["hwy", "model"]]     double brackets give you back a dataframe containing only columns specififed

# mpg.hwy
# mpg["hwy"]

# Imagine the above in sQL
# SELECT * FROM mpg
# WHERE hwy > 35

In [16]:
students = ['Sally', 'Jane', 'Suzie', 'Billy', 'Ada', 'John', 'Thomas',
            'Marie', 'Albert', 'Richard', 'Isaac', 'Alan']

In [17]:
example = pd.DataFrame({"fruits": ["kiwi"]})

In [18]:
example

Unnamed: 0,fruits
0,kiwi


In [19]:
example["banana"] = 2

In [20]:
example

Unnamed: 0,fruits,banana
0,kiwi,2


In [12]:
titanic = data("titanic")
titanic.shape

(1316, 4)

In [13]:
titanic.head()

Unnamed: 0,class,age,sex,survived
1,1st class,adults,man,yes
2,1st class,adults,man,yes
3,1st class,adults,man,yes
4,1st class,adults,man,yes
5,1st class,adults,man,yes


In [33]:
survivors = titanic[titanic["survived"] == "yes"].groupby("class").count().survived

In [34]:
lost = titanic[titanic["survived"] == "no"].groupby("class").count().survived

In [36]:
survivors > lost

class
1st class     True
2nd class    False
3rd class    False
Name: survived, dtype: bool

In [49]:
total_survival_avg = (titanic["survived"] == "yes").mean()
total_survival_avg

0.3791793313069909

In [59]:
titanic["survival_1st_class_avg"] = ((titanic["survived"] == "yes") & (titanic["class"]== "1st class")).mean()
survival_1st_class_avg

0.15425531914893617

In [61]:
titanic["survival_2nd_class_avg"] = ((titanic["survived"] == "yes") & (titanic["class"]== "2nd class")).mean()
survival_2nd_class_avg

0.08966565349544073

In [60]:
titanic["survival_3rd_class_avg"] = ((titanic["survived"] == "yes") & (titanic["class"]== "3rd class")).mean()
survival_3rd_class_avg

0.135258358662614

In [62]:
titanic

Unnamed: 0,class,age,sex,survived,survival_1st_class_avg,survival_3rd_class_avg,survival_2nd_class_avg
1,1st class,adults,man,yes,0.154255,0.135258,0.089666
2,1st class,adults,man,yes,0.154255,0.135258,0.089666
3,1st class,adults,man,yes,0.154255,0.135258,0.089666
4,1st class,adults,man,yes,0.154255,0.135258,0.089666
5,1st class,adults,man,yes,0.154255,0.135258,0.089666
6,1st class,adults,man,yes,0.154255,0.135258,0.089666
7,1st class,adults,man,yes,0.154255,0.135258,0.089666
8,1st class,adults,man,yes,0.154255,0.135258,0.089666
9,1st class,adults,man,yes,0.154255,0.135258,0.089666
10,1st class,adults,man,yes,0.154255,0.135258,0.089666
