In [None]:
import numpy as np
import pandas as pd

#### pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)
One-dimensional ndarray with axis labels

In [None]:
scores = {'Chinese': 75, 'English': 86, 'Math': 91}
obj1 = pd.Series(scores)
obj1.name = 'Student_1'
obj1

In [None]:
obj2 = pd.Series({'Chinese': 82.5, 'English': 67, 'Science': 78}, name='Student_2')
obj2

In [None]:
obj3 = obj1 + obj2
obj3

In [None]:
obj3.isnull()

In [None]:
obj3.notnull()

#### pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=False)
Two-dimensional size-mutable, potentially heterogeneous tabular data structure with labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. 

In [None]:
df = pd.DataFrame([obj1, obj2])
print(f'columns = {df.columns}')
print(f'index = {df.index}')
df

In [None]:
df = pd.DataFrame({
    'Student_1': [75.0, 86.0, 91.0, None],
    'Student_2': [82.5, 67.0, None, 78.0],
    'Student_3': [69.0, 75.0, 83.0, 64.0],},
    index=['Chinese', 'English', 'Math', 'Science']
)
print(f'columns = {df.columns}')
print(f'index = {df.index}')
df

In [None]:
df = pd.DataFrame({
    'Chinese': [75.0, 82.5, 69.0],
    'English': [86.0, 67.0, 75.0],
    'Math': [91.0, None, 83.0],
    'Science': [None, 78.0, 64.0]},
    index=['Student_1', 'Student_2', 'Student_3']
)
print(f'columns = {df.columns}')
print(f'index = {df.index}')
df

In [None]:
df = pd.DataFrame([
    [75.0, 86.0, 91.0, None],
    [82.5, 67.0, None, 78.0],
    [69.0, 75.0, 83.0, 64.0]],
    columns=['Chinese', 'English', 'Math', 'Science'],
    index=['Student_1', 'Student_2', 'Student_3']
)
print(f'columns = {df.columns}')
print(f'index = {df.index}')
df

In [None]:
print(f'type(df.Chinese) = {type(df.Chinese)}\n')
print(df.Chinese, '\n')
print(df['Chinese'])

#### DataFrame.T
Transpose index and columns.

In [None]:
df.T

#### pandas.DataFrame.loc
Access a group of rows and columns by label(s) or a boolean array.

.loc[ ] is primarily label based, but may also be used with a boolean array.

In [None]:
print(df.Chinese, '\n')

print(df.loc['Student_1'], '\n')
print(df.loc['Student_1', 'Chinese'])
print(df.loc['Student_1']['Chinese'], '\n')

# Attention: column-'Math' included
print(df.loc['Student_1']['Chinese':'Math'])

#### DataFrame.iloc
Purely integer-location based indexing for selection by position.

In [None]:
print(df.iloc[0], '\n')
print(df.iloc[0, 0])
print(df.iloc[0][0], '\n')

# Attention: index-2 not included
print(df.iloc[0][0:2])

In [None]:
df2 = df.copy()
df2['Music'] = [100, 90, 95]
df2

In [None]:
del df2['Chinese']
df2

#### DataFrame.reindex(labels=None, index=None, columns=None, axis=None, method=None, copy=True, level=None, fill_value=nan, limit=None, tolerance=None)
Conform DataFrame to new index with optional filling logic, placing NA/NaN in locations having no value in the previous index. A new object is produced unless the new index is equivalent to the current one and copy=False

In [None]:
df2 = df2.reindex(['Student_0', 'Student_2', 'Student_3', 'Student_1'])
df2

In [None]:
df2.index = [0, 2, 3, 1]
df2

In [None]:
df2.columns = ['EN', 'MA', 'SC', 'MU']
df2

In [None]:
values = df2.values
print(f'type(values) = {type(values)}')
values

In [None]:
# df2 is a copy of df
# changing the value in df2 will not change df
df

In [None]:
# insert a new column
df['ID'] = [i.split('_')[1] for i in df.index]
df

In [None]:
# rearrange columns in a data frame
df = df[['ID', 'Chinese', 'English', 'Math', 'Science']]
df

#### DataFrame.set_index(keys, drop=True, append=False, inplace=False, verify_integrity=False)
Set the DataFrame index (row labels) using one or more existing columns. By default yields a new object.

In [None]:
df = df.set_index('ID')
df

#### DataFrame.drop(labels=None, axis=0, index=None, columns=None, level=None, inplace=False, errors='raise')
Drop specified labels from rows or columns.

In [None]:
# df.drop() is not an inplace operation
df.drop(['Chinese', 'English'], axis=1)

#### DataFrame.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
Remove missing values.

In [None]:
df.dropna()

In [None]:
df2 = df.copy()
df2.loc['Avg'] = round(df.mean(axis=0), 1)
df2

In [None]:
df2['Sum'] = df2.sum(axis=1)
df2['Avg'] = df2.loc[:, 'Chinese': 'Science'].mean(axis=1)
df2

In [None]:
df3 = df2.drop('Avg', axis=0)
df4 = df3.sort_values(by=['Avg'], ascending=True)
df4

#### DataFrame.sort_index(axis=0, level=None, ascending=True, inplace=False, kind='quicksort', na_position='last', sort_remaining=True, by=None)
Sort object by labels (along an axis)

In [None]:
df4.sort_index(axis=0)

In [None]:
df4.sort_index(axis=1)

#### DataFrame.rank(axis=0, method='average', numeric_only=None, na_option='keep', ascending=True, pct=False)
Compute numerical data ranks (1 through n) along axis. Equal values are assigned a rank that is the average of the ranks of those values

In [None]:
df3.rank(axis=0, ascending=False)

#### DataFrame.idxmax(axis=0, skipna=True)
Return index of first occurrence of maximum over requested axis. NA/null values are excluded.

In [None]:
df3.index = ['Ann', 'Bob', 'Claire']
df3

In [None]:
df3.idxmax()

In [None]:
df3.idxmin()

In [None]:
df3.cumsum()

#### DataFrame.apply(func, axis=0, broadcast=False, raw=False, reduce=None, args=(), **kwds)
Applies function along input axis of DataFrame.

In [None]:
df = pd.DataFrame({
    'Student_1': [75.0, 86.0, 91.0, None],
    'Student_2': [82.5, 67.0, None, 78.0],
    'Student_3': [69.0, 75.0, 83.0, 64.0],},
    index=['Chinese', 'English', 'Math', 'Science']
)
df

In [None]:
def f(x):
    return pd.Series([x.min(), x.max()], index=['min', 'max'])

df.apply(f, axis=0)

In [None]:
df.apply(f, axis=1)

#### DataFrame.applymap(func)
Apply a function to a Dataframe <font color="red">elementwise</font>.

This method applies a function that accepts and returns a scalar to every element of a DataFrame.

In [None]:
f = lambda x: 0 if pd.isnull(x) else int(x)
df = df.applymap(f)
df

#### DataFrame.describe(percentiles=None, include=None, exclude=None)
Generates descriptive statistics that summarize the central tendency, dispersion and shape of a dataset’s distribution, excluding NaN values.

Analyzes both numeric and object series, as well as DataFrame column sets of mixed data types. The output will vary depending on what is provided. Refer to the notes below for more detail.

In [None]:
from sklearn.datasets import load_iris
iris = load_iris()
df = pd.DataFrame(iris.data, columns=iris.feature_names)
df.head()

In [None]:
df.describe()

#### DataFrame.cov(min_periods=None)
Compute pairwise covariance of columns, excluding NA/null values.


$ cov(X, Y) = E[(X - E[X]) (Y - E[Y])]$

In [None]:
df.cov()

#### DataFrame.corr(method='pearson', min_periods=1)
Compute pairwise correlation of columns, excluding NA/null values

$ corr(X, Y) = \frac{cov(X, Y)}{\sigma_{X} \sigma_{Y}} $

In [None]:
df.corr()

In [None]:
df = pd.DataFrame({
    'a': np.random.randint(1, 5, 5),
    'b': np.random.randint(1, 5, 5),
    'c': np.random.randint(1, 5, 5),
})

df

#### Series.unique()
Return unique values of Series object.

In [None]:
df['a'].unique()

#### Series.value_counts(normalize=False, sort=True, ascending=False, bins=None, dropna=True)
Returns object containing counts of unique values.

In [None]:
df['a'].value_counts(sort=False)

#### DataFrame.isin(values)
Return boolean DataFrame showing whether each element in the DataFrame is contained in values.

In [None]:
df.isin([1, 2])

In [None]:
mask = df['a'].isin([1, 2])
df[mask]

#### Index.get_indexer(target, method=None, limit=None, tolerance=None)
Compute indexer and mask for new index given the current index.

In [None]:
df.index = ['III', 'I', 'II', 'I', 'III']
df

In [None]:
old_index = df.index
new_index = pd.Series(['I', 'II', 'III'])
df.index = pd.Index(new_index).get_indexer(old_index)
df

In [None]:
df = pd.DataFrame({
    'a': np.random.randint(1, 5, 5),
    'b': np.random.randint(1, 5, 5),
    'c': np.random.randint(1, 5, 5),
})
f = lambda x: x if x != 4 else None
df = df.applymap(f)
df

In [None]:
df.apply(pd.value_counts)

In [None]:
df.apply(pd.value_counts).fillna(0)