# iPython Sessions - Part VI - Chapter 07

## DataFrames

In [2]:
import pandas as pd 

In [3]:
# DataFrame from a Dictionary
grades_dict = {'Wally': [87, 96, 70], 'Eva': [100, 87, 90],
               'Sam': [94, 77, 90], 'Katie': [100, 81, 82],
               'Bob': [83, 65, 85]}

In [4]:
grades = pd.DataFrame(grades_dict)

In [6]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
0,87,100,94,100,83
1,96,87,77,81,65
2,70,90,90,82,85


In [7]:
# Customising a DataFrame's Indices
pd.DataFrame(grades_dict, index=['Test1', 'Test2', 'Test3'])

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [9]:
grades.index = ['Test1', 'Test2', 'Test3']

In [10]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [11]:
# Accessing a DataFrame's columns
grades['Eva']

Test1    100
Test2     87
Test3     90
Name: Eva, dtype: int64

In [12]:
grades.Sam

Test1    94
Test2    77
Test3    90
Name: Sam, dtype: int64

In [13]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [17]:
# Accessing rows by label and integer index
grades.loc['Test1']

Wally     87
Eva      100
Sam       94
Katie    100
Bob       83
Name: Test1, dtype: int64

In [18]:
grades.iloc[0]

Wally     87
Eva      100
Sam       94
Katie    100
Bob       83
Name: Test1, dtype: int64

### Selecting rows via Slices and Lists using loc and iloc

In [38]:
grades.loc['Test1':'Test3']

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [23]:
# integer excludes the high index
grades.iloc[0:2]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65


In [24]:
# select specific rows using lists
grades.loc[['Test1', 'Test3']]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test3,70,90,90,82,85


In [26]:
grades.iloc[[0, 2]]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test3,70,90,90,82,85


#### Selecting subsets of the rows and columns

In [34]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,87,77,81,65
Test3,70,90,90,82,85


In [35]:
grades.loc['Test1': 'Test2', ['Eva', 'Katie']]

Unnamed: 0,Eva,Katie
Test1,100,100
Test2,87,81


In [36]:
grades.iloc[[0, 2], 0:3]

Unnamed: 0,Wally,Eva,Sam
Test1,87,100,94
Test3,70,90,90


### Boolean Indexing

In [40]:
grades[grades >= 90]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,,100.0,94.0,100.0,
Test2,96.0,,,,
Test3,,90.0,90.0,,


In [41]:
grades[(grades >= 80) & (grades < 90)]

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87.0,,,,83.0
Test2,,87.0,,81.0,
Test3,,,,82.0,85.0


In [42]:
### Accessing a Specific DataFrame cell by row and column

In [43]:
grades.at['Test2', 'Eva']

87

In [44]:
grades.iat[0, 2]

94

In [45]:
# assign new value to eva
grades.at['Test2', 'Eva'] = 100
grades.at['Test2', 'Eva']

100

In [46]:
grades.iat[1, 2] = 87
grades.iat[1, 2]

87

In [47]:
grades.describe()

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
count,3.0,3.0,3.0,3.0,3.0
mean,84.333333,96.666667,90.333333,87.666667,77.666667
std,13.203535,5.773503,3.511885,10.692677,11.015141
min,70.0,90.0,87.0,81.0,65.0
25%,78.5,95.0,88.5,81.5,74.0
50%,87.0,100.0,90.0,82.0,83.0
75%,91.5,100.0,92.0,91.0,84.0
max,96.0,100.0,94.0,100.0,85.0


In [48]:
pd.set_option('precision', 2)

In [49]:
grades.describe()

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
count,3.0,3.0,3.0,3.0,3.0
mean,84.33,96.67,90.33,87.67,77.67
std,13.2,5.77,3.51,10.69,11.02
min,70.0,90.0,87.0,81.0,65.0
25%,78.5,95.0,88.5,81.5,74.0
50%,87.0,100.0,90.0,82.0,83.0
75%,91.5,100.0,92.0,91.0,84.0
max,96.0,100.0,94.0,100.0,85.0


In [50]:
grades.mean()

Wally    84.33
Eva      96.67
Sam      90.33
Katie    87.67
Bob      77.67
dtype: float64

### Transposing DataFrame with T attribute

In [52]:
grades.T

Unnamed: 0,Test1,Test2,Test3
Wally,87,96,70
Eva,100,100,90
Sam,94,87,90
Katie,100,81,82
Bob,83,65,85


In [56]:
grades.T.describe()

Unnamed: 0,Test1,Test2,Test3
count,5.0,5.0,5.0
mean,92.8,85.8,83.4
std,7.66,13.81,8.23
min,83.0,65.0,70.0
25%,87.0,81.0,82.0
50%,94.0,87.0,85.0
75%,100.0,96.0,90.0
max,100.0,100.0,90.0


In [57]:
grades.T.mean()

Test1    92.8
Test2    85.8
Test3    83.4
dtype: float64

### Sorting by rows by their indices

In [60]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,100,87,81,65
Test3,70,90,90,82,85


In [59]:
grades.sort_index(ascending=False)

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test3,70,90,90,82,85
Test2,96,100,87,81,65
Test1,87,100,94,100,83


#### Sorting by column indices

In [63]:
grades.sort_index(axis=1)

Unnamed: 0,Bob,Eva,Katie,Sam,Wally
Test1,83,100,100,94,87
Test2,65,100,81,87,96
Test3,85,90,82,90,70


### Sorting by column values

In [65]:
grades.sort_values(by='Test1', axis=1, ascending=False)

Unnamed: 0,Eva,Katie,Sam,Wally,Bob
Test1,100,100,94,87,83
Test2,100,81,87,96,65
Test3,90,82,90,70,85


In [70]:
grades

Unnamed: 0,Wally,Eva,Sam,Katie,Bob
Test1,87,100,94,100,83
Test2,96,100,87,81,65
Test3,70,90,90,82,85


In [67]:
grades.loc['Test1'].sort_values(ascending=False)

Katie    100
Eva      100
Sam       94
Wally     87
Bob       83
Name: Test1, dtype: int64