# Pandas.

In [1]:
import pandas as pd

### Series.
One column.

In [2]:
s = pd.Series(["peach", "lilly", "dazy"])
print(s)

0    peach
1    lilly
2     dazy
dtype: object


### DataFrame, basics.
Multiple columns.

#### 1. Initialization.

In [3]:
idx = pd.RangeIndex(start=1, stop=4, step=1)   #stop = n -> idx = [start, n-1]
df = pd.DataFrame(
    data = [
        ["peach", 182, 65],
        ["lilly", 145, 64],
        ["dazy", 165, 53]
    ],
    columns = ["Name", "Height", "Weight"],
    index = idx
)
print(df)

    Name  Height  Weight
1  peach     182      65
2  lilly     145      64
3   dazy     165      53


#### 2. Indexing.
- Indexing rows.

In [4]:
df.loc[[1,3]]

Unnamed: 0,Name,Height,Weight
1,peach,182,65
3,dazy,165,53


- Indexing columns.

In [5]:
df[['Name', 'Height']]

Unnamed: 0,Name,Height
1,peach,182
2,lilly,145
3,dazy,165


- Indexing rows & columns.

In [6]:
df.loc[
    [1,3],       #rows.
    ['Name']     #columns.
]

Unnamed: 0,Name
1,peach
3,dazy


- Indexing by location.

In [7]:
df.iloc[
    [1],        #row in first position.
    [1]         #column in first position.
]

Unnamed: 0,Height
2,145


- Indexing by conditions.

In [8]:
df.loc[
    (df['Height'] < 180) &
    (df['Name'] == 'dazy')
]

Unnamed: 0,Name,Height,Weight
3,dazy,165,53


#### 3. Update.
should copy first, then update!

In [9]:
#Copy.
df2 = df.copy()
print(df2)

    Name  Height  Weight
1  peach     182      65
2  lilly     145      64
3   dazy     165      53


In [10]:
#Update.
df2.loc[
    [1],
    ['Height']
] = 175
print(df2)

    Name  Height  Weight
1  peach     175      65
2  lilly     145      64
3   dazy     165      53


#### 4. Delete.
drop() does not change the original, only returns the result.
- Drop rows.

In [11]:
df2.drop(index=[1,3])

Unnamed: 0,Name,Height,Weight
2,lilly,145,64


- Drop columns.

In [12]:
df2.drop(columns=['Height'])

Unnamed: 0,Name,Weight
1,peach,65
2,lilly,64
3,dazy,53


- Drop rows by condition.

In [13]:
df2.drop(df[df.Height < 170].index)

Unnamed: 0,Name,Height,Weight
1,peach,175,65


### DataFrame, advanced ftns.

In [14]:
df

Unnamed: 0,Name,Height,Weight
1,peach,182,65
2,lilly,145,64
3,dazy,165,53


#### 1. Print.

In [15]:
df.head(2)

Unnamed: 0,Name,Height,Weight
1,peach,182,65
2,lilly,145,64


In [16]:
df.tail(2)

Unnamed: 0,Name,Height,Weight
2,lilly,145,64
3,dazy,165,53


#### 2. Sort.
- Sort by index.

In [17]:
df.sort_index(ascending=False)

Unnamed: 0,Name,Height,Weight
3,dazy,165,53
2,lilly,145,64
1,peach,182,65


- Sort by column's value.

In [18]:
df.sort_values(by=['Height', 'Weight'], ascending=True)   #by Height first, then by Weight if the values of Height are equal.

Unnamed: 0,Name,Height,Weight
2,lilly,145,64
3,dazy,165,53
1,peach,182,65


#### 3. Merge & Split.

#### 4. Domain-specific classes.