* [cufflinks · PyPI](https://pypi.org/project/cufflinks/)
* [Pandas Plotting Backend | Python | Plotly](https://plotly.com/python/pandas-backend/)

In [None]:
# !pip install cufflinks

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt


import plotly.express as px
import cufflinks as cf
cf.go_offline(connected=True)

# pd.options.plotting.backend = "plotly"

sns.__version__

In [None]:
import warnings

warnings.filterwarnings("ignore")

In [None]:
# !pip install seaborn

In [None]:
df = sns.load_dataset("mpg")
df.shape

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.describe(include="object")

## 수치형 변수의 시각화

In [None]:
df["mpg"].unique()

In [None]:
h = df.hist(figsize=(15, 10), bins=10)

In [None]:
df["acceleration"].iplot(kind="box")

## histogram

In [None]:
df["acceleration"].iplot(kind="hist")

### Marginal Distribution Plots
* [Marginal Distribution Plots | Python | Plotly](https://plotly.com/python/marginal-plots/)

In [None]:
px.histogram(df, x="mpg", color="origin", marginal="box")

## violinplot

In [None]:
px.violin(df["mpg"], points="all")

In [None]:
fig = px.box(df, x="origin", y="mpg", points="all", color="origin")
fig.update_layout(title_text='origin mpg')
fig.show()

In [None]:
px.violin(df, x="mpg", color="origin")

In [None]:
px.violin(df, x="origin", y="mpg", color="origin")

In [None]:
px.bar_polar(df, r="weight")

In [None]:
px.density_contour(df, y="acceleration")

In [None]:
df["acceleration"].plot(kind="density")

In [None]:
sns.kdeplot(df["acceleration"], shade=True)
sns.rugplot(df["acceleration"])

### 두 개의 수치변수

In [None]:
df.columns

In [None]:
df.iplot(kind="scatter", x="horsepower", y="weight", mode='markers', title="horsepower & weight")

In [None]:
df.iplot(kind="reg", x="horsepower", y="weight", mode='markers', categories='origin')

In [None]:
df.iplot(kind="scatter", x="horsepower", y="weight", mode='markers', categories='origin', subplots=True)

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           color="cylinders", size="cylinders", title="horsepower & weight")

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           color="cylinders", size="cylinders", 
           facet_col="cylinders", title="horsepower & weight")

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           color="cylinders", size="cylinders", 
           facet_col="origin", title="horsepower & weight")

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           color="cylinders", size="cylinders", 
           marginal_x="histogram", marginal_y="rug")

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           size="cylinders", color="origin", trendline="ols", 
           marginal_x="violin", marginal_y="box")

In [None]:
px.scatter(df, x="horsepower", y="weight", trendline="ols", title="horsepower & weight")

In [None]:
sns.residplot(data=df, x="horsepower", y="weight")

In [None]:
px.scatter(df, x="horsepower", y="weight", color="origin",
           trendline="ols",
           facet_col="origin",
           title="horsepower & weight")

In [None]:
px.scatter(df, x="horsepower", y="weight", 
           size="cylinders", color="origin", trendline="ols", 
           marginal_x="histogram", marginal_y="histogram")

In [None]:
sns.jointplot(data=df, x="horsepower", y="weight",  kind='hex')

In [None]:
df_sample = df.sample(100, random_state=42)
df_sample

In [None]:
px.scatter_matrix(df, color="origin", height=1000)

In [None]:
corr = df.corr()
corr

In [None]:
import numpy as np
mask = np.triu(np.ones_like(corr, dtype=bool))

sns.heatmap(corr, annot=True, cmap="coolwarm", mask=mask, vmin=-1, vmax=1)

In [None]:
px.imshow(corr)

In [None]:
np.around(corr.values, 2)

In [None]:
import plotly.figure_factory as ff

ff.create_annotated_heatmap(np.around(corr.values, 2), 
                            x=corr.columns.tolist(),
                            y=corr.index.tolist(), colorscale="rdbu_r"
                           )

In [None]:
px.line(df, x="model_year", y="mpg")

In [None]:
px.line(df, x="model_year", y="mpg", color="origin")

In [None]:
px.line(df, x="model_year", y="mpg", color="origin", facet_col="origin")

In [None]:
sns.lineplot(data=df, x="model_year", y="mpg", ci=None, estimator='sum')

In [None]:
sns.relplot(data=df, x="model_year", y="mpg", kind="line", ci=None, col="origin")

### 범주형 데이터

In [None]:
df.nunique()

In [None]:
df["origin"].value_counts()

In [None]:
df["origin"].value_counts().iplot(kind="bar", title="origin")

In [None]:
px.histogram(df, x="origin", histfunc="count")

In [None]:
origin_cylinders = pd.crosstab(df["cylinders"], df["origin"])
origin_cylinders

In [None]:
origin_cylinders.iplot(kind="bar", title="cylinders")

In [None]:
origin_cylinders.T.iplot(kind="bar", title="origin")

In [None]:
px.histogram(df, x="origin", histfunc="count", color="cylinders")

In [None]:
px.histogram(df, x="origin", histfunc="count", color="cylinders", barmode="group")

In [None]:
px.bar(origin_cylinders, title="origin vs cylinders", text="origin")

In [None]:
px.bar(origin_cylinders.T, title="origin vs cylinders", text="cylinders")

In [None]:
px.bar(df, x="cylinders", y="mpg", color="origin", barmode="group")

In [None]:
px.bar(origin_cylinders.T, title="origin vs cylinders", text="cylinders", barmode='group')

In [None]:
px.bar(origin_cylinders, title="origin vs cylinders", facet_col="origin")

In [None]:
px.bar(origin_cylinders.T, title="origin vs cylinders", facet_col="cylinders")

### 두 개의 범주형 변수

In [None]:
df.groupby(["origin"])["horsepower"].mean()

In [None]:
px.bar(df, x="origin", y="horsepower", title="sum horsepower", text="horsepower")

In [None]:
origin_horsepower_mean = df.groupby("origin")["horsepower"].mean()
origin_horsepower_mean

In [None]:
px.bar(origin_horsepower_mean, 
       x=origin_horsepower_mean.index, 
       y=origin_horsepower_mean, text=origin_horsepower_mean)

In [None]:
origin_horsepower_sum = df.groupby("origin")["horsepower"].sum()
origin_horsepower_sum

In [None]:
px.bar(origin_horsepower_sum, 
       x=origin_horsepower_sum.index, 
       y=origin_horsepower_sum, text=origin_horsepower_sum)

In [None]:
df.groupby(["origin"])["horsepower"].agg(["mean", "sum"])

In [None]:
sns.barplot(data=df, x="origin", y="horsepower", estimator=np.sum)

In [None]:
sns.barplot(data=df, x="origin", y="horsepower", hue="cylinders")

In [None]:
sns.barplot(data=df, x="cylinders", y="horsepower", hue="origin")

In [None]:
df.groupby(["origin", "cylinders"])["horsepower"].sum().unstack()

In [None]:
px.box(df, x="origin", y="horsepower", color="origin", points="all")

In [None]:
g = df.groupby("origin")["horsepower"].describe()
g

In [None]:
Q3 = g.loc["europe", "75%"]
Q1 = g.loc["europe", "25%"]

IQR = Q3 - Q1
OUT_MAX = Q3 + (IQR * 1.5)
OUT_MIN = Q1 - (IQR * 1.5)
OUT_MAX, OUT_MIN

In [None]:
sns.boxplot(x = g.loc["europe"])

In [None]:
px.box(x=g.loc["europe"])

In [None]:
px.box(df, y="origin", x="mpg", color="origin")

In [None]:
px.density_contour(df, x="cylinders", y="weight")

In [None]:
sns.boxenplot(data=df, x="cylinders", y="horsepower", hue="origin")



In [None]:
px.violin(df, x="cylinders", y="horsepower", color="cylinders", facet_col="origin")

In [None]:
px.scatter(df, x="cylinders", y="horsepower", color="cylinders")

In [None]:
px.scatter(df, x="cylinders", y="horsepower", color="cylinders")

In [None]:
px.strip(df, x="cylinders", y="weight")

In [None]:
px.strip(df, x="cylinders", y="weight", color="origin")

In [None]:
sns.stripplot(data=df, x="cylinders", y="horsepower")

In [None]:
sns.swarmplot(data=df, x="cylinders", y="horsepower", size=2)

### subplots

In [None]:
px.strip(df, x="cylinders", y="weight", color="origin", facet_col="origin")

In [None]:
px.box(df, x="cylinders", y="weight", color="origin", facet_col="origin")

In [None]:
px.violin(df, x="cylinders", y="weight", color="origin", facet_col="origin", title="cylinders & weight")

In [None]:
sns.catplot(data=df, x="origin", hue="cylinders", col="cylinders", kind="count", col_wrap=3)

In [None]:
df_sub = df[["cylinders", "displacement"]]
df_sub.head()

In [None]:
df_sub.isnull().sum()

In [None]:
px.scatter(df.select_dtypes(exclude="object"))

In [None]:
sns.relplot(data=df)

In [None]:
px.line(df.select_dtypes(exclude="object"))

In [None]:
sns.relplot(data=df, kind="line")

In [None]:
px.box(df.select_dtypes(exclude="object"))

In [None]:
sns.catplot(data=df, orient="h", kind="box")

In [None]:
px.violin(df.select_dtypes(exclude="object"))

In [None]:
sns.catplot(data=df, orient="h", kind="violin")

In [None]:
px.colors.sequential.swatches()

In [None]:
px.colors.cyclical.swatches()

In [None]:
px.colors.diverging.swatches()

In [None]:
px.colors.qualitative.swatches()