In [33]:
# Import Pandas package
import pandas as pd

# Construct dataframe example
example = pd.DataFrame(
    data=[
        ['China', 'Asia', 9572900],
        ['Bangladesh', 'Asia', 143998],
        ['Brazil', 'South America', 8547403],
        ['Norway', 'Europe', 358207],
    ],
    columns=['Country', 'Continent', 'Population'],
)

# Display example
example

Unnamed: 0,Country,Continent,Population
0,China,Asia,9572900
1,Bangladesh,Asia,143998
2,Brazil,South America,8547403
3,Norway,Europe,358207


In [15]:
# Show information about example
example.info(verbose=False)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Columns: 3 entries, Country to Population
dtypes: int64(1), object(2)
memory usage: 228.0+ bytes


In [34]:
# Display rows 1 through 3 and columns Country through Continent
example.loc[1:3, 'Country':'Continent']

Unnamed: 0,Country,Continent
1,Bangladesh,Asia
2,Brazil,South America
3,Norway,Europe


In [37]:
# Sort example on Continent
example_new = example.sort_values('Continent', ascending=True, inplace=False)

# Display sorted data
example_new #since inplace =False --> not going to change the orignal dataframe, example

Unnamed: 0,Country,Continent,Population
0,China,Asia,9572900
1,Bangladesh,Asia,143998
3,Norway,Europe,358207
2,Brazil,South America,8547403


In [40]:
example # there is no change since inplace = False

Unnamed: 0,Country,Continent,Population
0,China,Asia,9572900
1,Bangladesh,Asia,143998
2,Brazil,South America,8547403
3,Norway,Europe,358207


In [38]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=example_new)

MessageError: Error: credential propagation was unsuccessful

```

Quick cheatsheet

Reductions: axis=0 → column-wise result; axis=1 → row-wise result.

Drop/Rename/Sort index: axis=0 → affect rows; axis=1 → affect columns
Note: for sorting, axis = 0 is ommitted.

```


In [21]:
df = pd.DataFrame(
    data=[
        ['James', 30, 'New York'],
        ['Emily',40, 'LA'],
        ['Jessica', 25, 'Chicago'],
        ['Bob', 35, 'New York'],
        ['Kate', 25, 'LA'],
        ['Tom', 30, 'New York']
    ],
    columns=['Name', 'Age', 'City'],
    index=['a', 'b', 'c', 'd', 'e', 'f']
)
df

Unnamed: 0,Name,Age,City
a,James,30,New York
b,Emily,40,LA
c,Jessica,25,Chicago
d,Bob,35,New York
e,Kate,25,LA
f,Tom,30,New York


In [22]:
df.loc['c':'e']

Unnamed: 0,Name,Age,City
c,Jessica,25,Chicago
d,Bob,35,New York
e,Kate,25,LA


In [23]:
#iloc --> position/integer based --> 0th = 1st row, 1st postion = 2nd row, 2nd postion = 3rd row
df.iloc [2:5]

Unnamed: 0,Name,Age,City
c,Jessica,25,Chicago
d,Bob,35,New York
e,Kate,25,LA


In [25]:
df.Age #col subsetting

Unnamed: 0,Age
a,30
b,40
c,25
d,35
e,25
f,30


In [26]:
df['Age'] #col subsetting method 2  --> returns a series

Unnamed: 0,Age
a,30
b,40
c,25
d,35
e,25
f,30


In [27]:
df[['Age']]  #col subsetting method 3  --> returns a dataframe

Unnamed: 0,Age
a,30
b,40
c,25
d,35
e,25
f,30


In [28]:
df.Age >= 30

Unnamed: 0,Age
a,True
b,True
c,False
d,True
e,False
f,True


In [30]:
df[df.Age >= 30]

Unnamed: 0,Name,Age,City
a,James,30,New York
b,Emily,40,LA
d,Bob,35,New York
f,Tom,30,New York


In [31]:
df.loc[df['Age'] >= 30]

Unnamed: 0,Name,Age,City
a,James,30,New York
b,Emily,40,LA
d,Bob,35,New York
f,Tom,30,New York
