<a href="https://colab.research.google.com/github/beeyan/Analytics/blob/master/Colab%20Notebooks/learning_marketing/ch3_EDA_by_plotly.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Plotlyを用いた可視化&マーケティングデータの扱い基本

In [None]:
import numpy as np
import pandas as pd

import plotly
import plotly.graph_objs as go
# Google Colab. やJupyter Lab.でプロットするためには，以下を実行する．
import plotly.io as pio
import plotly.express as px
pio.renderers.default = "colab"
plotly.__version__

'4.4.1'

### データラングリング

In [None]:
df_store = pd.read_csv('http://goo.gl/QPDdMl')

In [None]:
# シミュレーションデータの確認
print(df_store.dtypes)
print()
print("データ数: ", len(df_store))

storeNum      int64
Year          int64
Week          int64
p1sales       int64
p2sales       int64
p1price     float64
p2price     float64
p1prom        int64
p2prom        int64
country      object
dtype: object

データ数:  2080


In [None]:
# 頻度の計算
df_store.groupby('p1price').count()[['storeNum']].reset_index()

Unnamed: 0,p1price,storeNum
0,2.19,395
1,2.29,444
2,2.49,423
3,2.79,443
4,2.99,375


In [None]:
df_p1 = df_store.groupby('p1price').count()[['storeNum']].reset_index()
df_p1

Unnamed: 0,p1price,storeNum
0,2.19,395
1,2.29,444
2,2.49,423
3,2.79,443
4,2.99,375


In [None]:
# pivot_tableで集計
df_p2 = pd.pivot_table(data=df_store, 
                       values='storeNum', 
                       index='p1price', 
                       columns='p1prom', 
                       aggfunc='count')

df_p2 = df_p2.reset_index().reset_index().drop('index', axis=1)

# promotion_rateの算出
df_p2['promotion_rate'] = df_p2[1] / (df_p2[0] + df_p2[1])

df_p2

p1prom,p1price,0,1,promotion_rate
0,2.19,354,41,0.103797
1,2.29,398,46,0.103604
2,2.49,381,42,0.099291
3,2.79,396,47,0.106095
4,2.99,343,32,0.085333


In [None]:
df_store[['p1sales']]\
    .quantile(list(np.arange(0.1, 1.1, 0.1)))\
    .reset_index()\
    .drop('index', axis=1)

Unnamed: 0,p1sales
0,100.0
1,109.0
2,117.0
3,122.6
4,129.0
5,136.0
6,145.0
7,156.0
8,171.0
9,263.0


In [None]:
# 基礎統計量はdescribeで出せる
df_store.describe()

Unnamed: 0,storeNum,Year,Week,p1sales,p2sales,p1price,p2price,p1prom,p2prom
count,2080.0,2080.0,2080.0,2080.0,2080.0,2080.0,2080.0,2080.0,2080.0
mean,110.5,1.5,26.5,133.048558,100.156731,2.544375,2.699519,0.1,0.138462
std,5.767668,0.50012,15.01194,28.372599,24.42419,0.294882,0.329218,0.300072,0.345467
min,101.0,1.0,1.0,73.0,51.0,2.19,2.29,0.0,0.0
25%,105.75,1.0,13.75,113.0,84.0,2.29,2.49,0.0,0.0
50%,110.5,1.5,26.5,129.0,96.0,2.49,2.59,0.0,0.0
75%,115.25,2.0,39.25,150.0,113.0,2.79,2.99,0.0,0.0
max,120.0,2.0,52.0,263.0,225.0,2.99,3.19,1.0,1.0


### plotlyを用いた描画

In [None]:
# histogramの作成

fig = px.histogram(df_store, x="p1sales")

fig.update_layout(
    title_text='Sample histogram', # title of plot
    xaxis_title_text='p1sales', # xaxis label
    yaxis_title_text='Count_values', # yaxis label
    width=600, 
    height=400
    )
fig.show()

In [None]:
# distplotの作成
import plotly.figure_factory as ff
fig = ff.create_distplot([df_store['p1sales']], ['distplot'])

fig.update_layout(
    title_text='Sample distplot', # title of plot
    xaxis_title_text='p1sales', # xaxis label
    yaxis_title_text='Count_values', # yaxis label
    width=600, height=400)
fig.show()

In [None]:
# boxplotの作成
fig = px.box(df_store, y="p2sales")
fig.update_layout(
    title_text='Sample boxplot', # title of plot
    xaxis_title_text='p2sales', # xaxis label
    yaxis_title_text='Count_values', # yaxis label
    )
fig.show()

In [None]:
def func(x):
  if x == 1:
    return 'Yes'
  else:
    return 'No'
df_store['p2prom_category'] = df_store['p2prom'].apply(func)

# histogramとboxplotを同時に描画する.
fig = px.histogram(df_store, 
                   x='p2sales', 
                   color='p2prom_category', 
                   width=600, 
                   height=350,
                   opacity=0.4, marginal='box')
fig.update_layout(barmode='overlay')
fig.show()