In [3]:
import pandas as pd

# Creating DataFrames

### pd.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None)
**data:** ndarray (structured or homogeneous), Iterable, dict, or DataFrame \
**index:** Index or array-like \
**columns:** Index or array-like \
**dtype:** dtype, default None \
**copy:** bool or None, default None

In [118]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]})
df

Unnamed: 0,age,sex,height,weight
0,18,M,178,82
1,25,M,169,102
2,24,F,166,66


In [32]:
df = pd.DataFrame(
    [[18, "M", 178, 82], [25, "M", 169, 102], [24, "F", 166, 66]],
    index=[1, 2, 3],
    columns=["age", "sex", "height", "weight"])
df

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


# Subset Observations - rows

In [33]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [1, 2, 3])
df

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


In [34]:
df[(df.age > 20) | (df.weight > 100)]

Unnamed: 0,age,sex,height,weight
2,25,M,169,102
3,24,F,166,66


In [35]:
df[(df.age > 20) & (df.weight > 100)]

Unnamed: 0,age,sex,height,weight
2,25,M,169,102


In [36]:
df.head(2)

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102


In [37]:
df.tail(2)

Unnamed: 0,age,sex,height,weight
2,25,M,169,102
3,24,F,166,66


# Subset Variables - columns

In [38]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [1, 2, 3])
df

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


In [39]:
df["height"]

1    178
2    169
3    166
Name: height, dtype: int64

In [40]:
df[["age", "height"]]

Unnamed: 0,age,height
1,18,178
2,25,169
3,24,166


### df.filter(items=None, like=None, regex=None, axis=None)
**items:** list-like \
**like:** str \
**regex:** str (regular expression) \
**axis:** {0 or ‘index’, 1 or ‘columns’, None}, default None

In [41]:
df.filter(items=["sex", "height"])

Unnamed: 0,sex,height
1,M,178
2,M,169
3,F,166


In [61]:
df.filter(regex="ght$")

Unnamed: 0,height,weight
1,178,82
2,169,102
3,166,66


# Subsets - rows and columns

In [42]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [1, 2, 3])
df

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


### df.loc[]
Access a group of rows and columns by label(s) or a boolean array. \
**Range Inclusive**

In [47]:
df.loc[:, ["age", "weight"]]

Unnamed: 0,age,weight
1,18,82
2,25,102
3,24,66


In [55]:
df.loc[2:3, "age":"height"]

Unnamed: 0,age,sex,height
2,25,M,169
3,24,F,166


In [48]:
df.loc[df.age > 20, ["height", "weight"]]

Unnamed: 0,height,weight
2,169,102
3,166,66


### df.iloc[]
Purely integer-location based indexing for selection by position. \
**End Exclusive**

In [51]:
df.iloc[:, [0, 3]]

Unnamed: 0,age,weight
1,18,82
2,25,102
3,24,66


In [53]:
df.iloc[1:]

Unnamed: 0,age,sex,height,weight
2,25,M,169,102
3,24,F,166,66


### df.at[]
Access a single value for a row/column label pair.

In [56]:
df.at[3, "height"]

166

### df.iat[]
Access a single value for a row/column pair by integer position.

In [58]:
df.iat[2, 2]

166

# Using query

### df.query(expr, *, inplace=False, **kwargs)
Query the columns of a DataFrame with a boolean expression.

**expr:** str \
**inplace:** bool \
****kwargs**

In [135]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["Male", "Male", "Female"], "height": [178, 169, 166], 
     "weight": [82, 102, 66], "number of days": [30, 103, 83]}, 
    index = [1, 2, 3])
df

Unnamed: 0,age,sex,height,weight,number of days
1,18,Male,178,82,30
2,25,Male,169,102,103
3,24,Female,166,66,83


In [136]:
df.query("age > 20 and weight > 100")

Unnamed: 0,age,sex,height,weight,number of days
2,25,Male,169,102,103


In [137]:
df.query("`number of days` <= 100")

Unnamed: 0,age,sex,height,weight,number of days
1,18,Male,178,82,30
3,24,Female,166,66,83


In [138]:
df.query("sex.str.startswith('M')")

Unnamed: 0,age,sex,height,weight,number of days
1,18,Male,178,82,30
2,25,Male,169,102,103


# Reshaping Data

### pd.concat(objs, *, axis=0, join='outer', ignore_index=False, keys=None, levels=None, names=None, verify_integrity=False, sort=False, copy=True)

**objs:** a sequence or mapping of Series or DataFrame objects \
**axis:** {0/’index’, 1/’columns’}, default 0 \
**join:** {‘inner’, ‘outer’}, default ‘outer’ \
**ignore_index:** bool, default False \
**keys:** sequence, default None \
**levels:** list of sequences, default None \
**names:** list, default None \
**verify_integrity:** bool, default False \
**sort:** bool, default False \
**copy:** bool, default True

### Concat by row

In [67]:
df1 = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [1, 2, 3])
df1

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


In [82]:
df2 = pd.DataFrame(
    {"age": [34, 27], "sex": ["F", "M"], "height": [153, 172], "weight": [77, 83]}, 
    index = [1, 3])
df2

Unnamed: 0,age,sex,height,weight
1,34,F,153,77
3,27,M,172,83


In [83]:
# ignore_index = reset_index
pd.concat([df1, df2], ignore_index=True)

Unnamed: 0,age,sex,height,weight
0,18,M,178,82
1,25,M,169,102
2,24,F,166,66
3,34,F,153,77
4,27,M,172,83


### Concat by column

In [94]:
df3 = pd.DataFrame(
    {"salary": [25000, 19000, 32000]}, 
    index = [1, 2, 3])
df3

Unnamed: 0,salary
1,25000
2,19000
3,32000


In [95]:
pd.concat([df1, df3], axis=1)

Unnamed: 0,age,sex,height,weight,salary
1,18,M,178,82,25000
2,25,M,169,102,19000
3,24,F,166,66,32000


### df.sort_values(by, *, axis=0, ascending=True, inplace=False, kind='quicksort', na_position='last', ignore_index=False, key=None)

**by:** str or list of str \
**axis:** {0 or ‘index’, 1 or ‘columns’}, default 0 \
**ascending:** bool or list of bool, default True \
**inplace:** bool, default False \
**kind:** {‘quicksort’, ‘mergesort’, ‘heapsort’, ‘stable’}, default ‘quicksort’ \
**na_position:** {‘first’, ‘last’}, default ‘last’ \
**ignore_index:** bool, default False \
**key:** callable, optional

In [96]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [1, 2, 3])
df

Unnamed: 0,age,sex,height,weight
1,18,M,178,82
2,25,M,169,102
3,24,F,166,66


In [100]:
df.sort_values(by="weight")

Unnamed: 0,age,sex,height,weight
3,24,F,166,66
1,18,M,178,82
2,25,M,169,102


In [102]:
df.sort_values(by="weight", ascending=False)

Unnamed: 0,age,sex,height,weight
2,25,M,169,102
1,18,M,178,82
3,24,F,166,66


### df.sort_index(*, axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, ignore_index=False, key=None)

**axis:** {0 or ‘index’, 1 or ‘columns’}, default 0 \
**ascending:** bool or list-like of bools, default True \
**inplace:** bool, default False \
**kind:** {‘quicksort’, ‘mergesort’, ‘heapsort’, ‘stable’}, default ‘quicksort’ \
**na_position:** {‘first’, ‘last’}, default ‘last’ \
**sort_remaining:** bool, default True \
**ignore_index:** bool, default False \
**key:** callable, optional

In [105]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = [2, 1, 3])
df

Unnamed: 0,age,sex,height,weight
2,18,M,178,82
1,25,M,169,102
3,24,F,166,66


In [106]:
df.sort_index()

Unnamed: 0,age,sex,height,weight
1,25,M,169,102
2,18,M,178,82
3,24,F,166,66


In [108]:
df.sort_index(ascending=False)

Unnamed: 0,age,sex,height,weight
3,24,F,166,66
2,18,M,178,82
1,25,M,169,102


### df.rename(mapper=None, *, index=None, columns=None, axis=None, copy=None, inplace=False, level=None, errors='ignore')

**mapper:** dict-like or function \
**index:** dict-like or function \
**columns:** dict-like or function \
**axis:** {0 or ‘index’, 1 or ‘columns’}, default 0 \
**copy:** bool, default True \
**inplace:** bool, default False \
**level:** int or level name, default None \
**errors:** {‘ignore’, ‘raise’}, default ‘ignore’

In [110]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "h": [178, 169, 166], "w": [82, 102, 66]}, 
    index = [2, 1, 3])
df

Unnamed: 0,age,sex,h,w
2,18,M,178,82
1,25,M,169,102
3,24,F,166,66


In [115]:
df.rename(columns={"h": "height", "w": "weight"}, inplace=True)
df

Unnamed: 0,age,sex,height,weight
2,18,M,178,82
1,25,M,169,102
3,24,F,166,66


In [116]:
df.rename(str.upper, axis=1)

Unnamed: 0,AGE,SEX,HEIGHT,WEIGHT
2,18,M,178,82
1,25,M,169,102
3,24,F,166,66


### df.reset_index(level=None, *, drop=False, inplace=False, col_level=0, col_fill='', allow_duplicates=_NoDefault.no_default, names=None)

**level:** int, str, tuple, or list, default None \
**drop:** bool, default False \
**inplace:** bool, default False \
**col_level:** int or str, default 0 \
**col_fill:** object, default ‘’ \
**allow_duplicates:** bool, optional, default lib.no_default \
**names:** int, str or 1-dimensional list, default None

In [123]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = ["Eric", "Sam", "Mary"])
df

Unnamed: 0,age,sex,h,w
Eric,18,M,178,82
Sam,25,M,169,102
Mary,24,F,166,66


In [125]:
df.reset_index()

Unnamed: 0,index,age,sex,h,w
0,Eric,18,M,178,82
1,Sam,25,M,169,102
2,Mary,24,F,166,66


In [126]:
df.reset_index(names="name")

Unnamed: 0,name,age,sex,h,w
0,Eric,18,M,178,82
1,Sam,25,M,169,102
2,Mary,24,F,166,66


### df.drop(labels=None, *, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')

Drop specified labels from rows or columns.

**labels:** single label or list-like \
**axis:** {0 or ‘index’, 1 or ‘columns’}, default 0 \
**index:** single label or list-like \
**columns:** single label or list-like \
**level:** int or level name, optional \
**inplace:** bool, default False \
**errors:** {‘ignore’, ‘raise’}, default ‘raise’

In [130]:
df = pd.DataFrame(
    {"age": [18, 25, 24], "sex": ["M", "M", "F"], "height": [178, 169, 166], "weight": [82, 102, 66]}, 
    index = ["Eric", "Sam", "Mary"])
df

Unnamed: 0,age,sex,height,weight
Eric,18,M,178,82
Sam,25,M,169,102
Mary,24,F,166,66


In [133]:
df.drop(index=["Sam"])

Unnamed: 0,age,sex,height,weight
Eric,18,M,178,82
Mary,24,F,166,66


In [134]:
df.drop(columns=["height", "weight"])

Unnamed: 0,age,sex
Eric,18,M
Sam,25,M
Mary,24,F
