In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('FS_qtr.csv', encoding='utf-8', index_col=0)
print(df.shape)
df.head()

#### DataFrame.dropna(axis=0, how='any', thresh=None, subset=None, inplace=False)
Remove missing values.

In [None]:
df1 = df.copy()
df1 = df1.dropna(how='any', axis=1)
print(df1.shape)
df1.head()

#### DataFrame.fillna(value=None, method=None, axis=None, inplace=False, limit=None, downcast=None, **kwargs)
Fill NA/NaN values using the specified method

In [None]:
df1 = df.copy()
df1 = df1.fillna(0)
print(df1.shape)
df1.head()

In [None]:
df1 = df.copy()
df1 = df1.fillna(method='ffill', axis=1)
print(df1.shape)
df1.head()

#### DataFrame.duplicated(subset=None, keep='first')
Return boolean Series denoting <font color="red">duplicate rows</font>, optionally only considering certain columns

#### DataFrame.drop_duplicates(subset=None, keep='first', inplace=False)
Return DataFrame with duplicate rows removed, optionally only considering certain columns

In [None]:
# create a duplicated row
df1.iloc[1] = df1.iloc[0]
df1.head()

In [None]:
df1.duplicated()[:5]

#### DataFrame.drop_duplicates(subset=None, keep='first', inplace=False)
Return DataFrame with duplicate rows removed, optionally only considering certain columns

In [None]:
df1 = df1.drop_duplicates()
df1.head()

In [None]:
df2 = pd.DataFrame()
df2['公司代號'] = df['公司代號']
df2['公司名稱'] = df['公司名稱']
df2['毛利率(%)'] = round(df['營業毛利（毛損）'] / df['營業收入'] * 100, 2)
df2['營業利益率(%)'] = round(df['營業利益（損失）'] / df['營業收入'] * 100, 2)
df2['業外佔税前淨利比(%)'] = round(df['營業外收入及支出'] / df['稅前淨利（淨損）'] * 100, 2)
df2['稅後淨利率(%)'] = round(df['本期淨利（淨損）'] / df['營業收入'] * 100, 2)
df2['EPS'] = df['基本每股盈餘（元）']
df2 = df2.set_index('公司代號')
df2.head()

In [None]:
df2.loc[2451: 2454]

In [None]:
df = pd.read_csv('tips.csv')
df.head()

In [None]:
df['time'] = df['time'].str.lower()
df.head()

#### Series.map(arg, na_action=None)
Map values of <font color='red'>Series</font> using input correspondence (a dict, Series, or function).

In [None]:
df['smoker'] = df['smoker'].map(lambda x: x.lower())
df.head()

#### DataFrame.replace(to_replace=None, value=None, inplace=False, limit=None, regex=False, method='pad')
Replace values given in to_replace with value.

In [None]:
df.replace('Sun', 'Sunday', inplace=True)
df.replace([2, 3, 4], ['Two', 'Three', 'Four'], inplace=True)
df.replace({'yes': 'YES', 'no': 'NO'}, inplace=True)
df.head()

#### pandas.cut(x, bins, right=True, labels=None, retbins=False, precision=3, include_lowest=False, duplicates='raise')
Bin values into discrete intervals.

#### pandas.qcut(x, q, labels=None, retbins=False, precision=3)
Quantile-based discretization function. 

q : integer or array of quantiles

Number of quantiles. 10 for deciles, 4 for quartiles, etc. Alternately array of quantiles, e.g. [0, .25, .5, .75, 1.] for quartiles

#### Series.value_counts(normalize=False, sort=True, ascending=False, bins=None, dropna=True)
Returns object containing counts of unique values.

In [None]:
df = pd.read_csv('tips.csv')
print(df.shape)
df['tip'].min(), df['tip'].max()

In [None]:
bins = [0, 2, 4, 6, 8, 10]
tip_bins = pd.cut(df['tip'], bins)
pd.value_counts(tip_bins)

In [None]:
tip_bins = pd.qcut(df['tip'], 4)
pd.value_counts(tip_bins)

In [None]:
tip_bins = pd.qcut(df['tip'], [0, .33, .67, 1.])
pd.value_counts(tip_bins)

DataFrame.plot(x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, table=False, yerr=None, xerr=None, secondary_y=False, sort_columns=False, **kwds)[source]
Make plots of DataFrame using matplotlib / pylab.

In [None]:
df['tip'].plot(kind='hist', bins=20, edgecolor='black')

#### DataFrame.all(axis=0, bool_only=None, skipna=True, level=None, **kwargs)[source]
Return whether all elements are True, potentially over an axis.

In [None]:
df[(df >= 5).all(axis=1)]

#### DataFrame.any(axis=0, bool_only=None, skipna=True, level=None, **kwargs)[source]
Return whether any element is True over requested axis.

In [None]:
df[(df[['total_bill', 'tip']] >= 45).any(axis=1)]

#### pandas.get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False, columns=None, sparse=False, drop_first=False, dtype=None)
Convert <font color="red">categorical</font> variable into dummy/indicator variables (one-hot encoder)

In [None]:
# one-hot-encoded table
df1 = df.copy()
dummies = pd.get_dummies(df1['smoker'], prefix='smoker')
df1 = df1.join(dummies)
df1.head()