In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
import seaborn as sns 
import platform
import re

sns.set(font='sans-serif')
sns.set_style("whitegrid",{"font.sans-serif":['Microsoft JhengHei']})

In [None]:
theOS = list(platform.uname())[0]
if theOS == 'Windows':
    theOS = '\\'
    theEncode = 'utf-8-sig'
else:
    theOS = '/'
    theEncode = 'utf-8'

df = pd.read_csv('../data/ta_feng_all_months_merged.csv')
## 時間格式轉換
df['Date'] = pd.to_datetime(df['TRANSACTION_DT'])

df

## 計算毛利潤 Gross Profit

In [None]:
df['gross_profit'] = df['SALES_PRICE'] - df['ASSET']
df

## 排名: 計算每個商品總類賺了多少並找到雜貨店內賺最多的商品 

In [None]:
## 將商品種類轉成字串
df['PRODUCT_SUBCLASS'] = df['PRODUCT_SUBCLASS'].astype(str)

pdbar = df[['PRODUCT_SUBCLASS','gross_profit']].groupby("PRODUCT_SUBCLASS", as_index = False)['gross_profit'].sum()
pdbar.sort_values(by = 'gross_profit', ascending = False)




In [None]:
## 視覺化
ax = sns.barplot(x="PRODUCT_SUBCLASS", 
                 y="gross_profit", 
                 data=pdbar
                 ).set_title("產品毛利分類")

In [None]:
## 列出前十名的商品
pdbar_top = pdbar.sort_values(by = 'gross_profit', ascending = False)[:10]
pdbar_top

In [None]:
## 視覺化
f, ax= plt.subplots(figsize = (14, 10))
ax = sns.barplot(x="PRODUCT_SUBCLASS", 
                 y="gross_profit", 
                 data=pdbar_top
                 ).set_title("產品毛利分類")


## 加入年齡層進行分類 (列出前幾名的商品種類)

In [None]:
pdbar = df[['PRODUCT_SUBCLASS','AGE_GROUP','gross_profit']].groupby(['PRODUCT_SUBCLASS','AGE_GROUP'], as_index = False)['gross_profit'].sum()
pdbar_top = pdbar.sort_values(by = 'gross_profit', ascending = False)[:10]
pdbar_top

In [None]:
f, ax= plt.subplots(figsize = (14, 10))
ax = sns.barplot(x="PRODUCT_SUBCLASS", 
                 y="gross_profit", 
                 hue="AGE_GROUP", 
                 data=pdbar_top,
                 estimator=np.sum
                 ).set_title("毛利高地分類分類-年齡層")

## 計算每天的交易額

In [None]:
pdbar = df[['Date','gross_profit']].groupby('Date', as_index = False)['gross_profit'].sum()
pdbar

In [None]:
## 轉換時間格式
date = []
for i in range(len(pdbar)):
    date.append(datetime.strftime(pdbar['Date'][i], '%m-%d'))
pdbar['Date'] = date
pdbar

## 觀察前30天的變化 11月1日~11月30日

In [None]:
ax = sns.factorplot(x="Date", 
                    y="gross_profit", 
                    estimator=np.sum,
                    data=pdbar[:30]
                    #kind = 'bar,swarm,violin,box'
                    )
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('毛利狀況（日）')
plt.gcf().set_size_inches(15, 8)

## 觀察每月的交易額變化

In [None]:
month = []
for i in range(len(df)):
    month.append(re.findall('(\d+)',         df['TRANSACTION_DT'][i])[0])

df['month'] = month

In [None]:
pdbar = df[['month','gross_profit']].groupby('month', as_index = False)['gross_profit'].sum()
pdbar = pdbar.reindex(index = [1,2,0,3])
pdbar

In [None]:
ax = sns.factorplot(x="month", 
                    y="gross_profit", 
                    estimator=np.sum,
                    data=pdbar
                    #kind = 'bar,swarm,violin,box'
                    )
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('毛利狀況（月）')
plt.gcf().set_size_inches(15, 8)

## 觀察最低毛利潤 12月的變化 12月1日~12月31日

In [None]:
pdbar = df[['Date','gross_profit']].groupby('Date', as_index = False)['gross_profit'].sum()
pdbar

In [None]:
## 轉換時間格式
date = []
for i in range(len(pdbar)):
    date.append(datetime.strftime(pdbar['Date'][i], '%m-%d'))
pdbar['Date'] = date

In [None]:
ax = sns.factorplot(x="Date", 
                    y="gross_profit", 
                    estimator=np.sum,
                    data=pdbar[30:61]
                    #kind = 'bar,swarm,violin,box'
                    )
ax.set_xticklabels(rotation=90)
ax.fig.suptitle('毛利狀況（日）')
plt.gcf().set_size_inches(15, 8)

## 根據賺取利潤最高的前五名產品 分別查看各年齡層的購買狀況

## 過濾出利潤最高的前五名商品種類

In [None]:
profite_high_product = df[df['PRODUCT_SUBCLASS'].isin(pdbar_top['PRODUCT_SUBCLASS'].values)]
profite_high_product 

In [None]:
pdbar = profite_high_product[['PRODUCT_SUBCLASS','AGE_GROUP','gross_profit']].groupby(["PRODUCT_SUBCLASS",'AGE_GROUP'], as_index = False)['gross_profit'].sum()
ax = sns.FacetGrid(pdbar, col="AGE_GROUP")
ax.map_dataframe(sns.barplot, 
                    x='PRODUCT_SUBCLASS',
                   y="gross_profit")
ax.set_xticklabels(rotation=90)
plt.gcf().set_size_inches(20, 10)