# Todo

- [x] 把成交量整理成每週加總
- [ ] 加總同縣市市場

In [2]:
import pandas as pd

# Load Data

In [3]:
total_data = pd.read_csv('../price_with_typhoon_utf8.csv')
total_data = total_data[['DateTime', '交易量', '市場名稱', '作物名稱']]

In [4]:
USED_CROPS = ['鳳梨-金鑽鳳梨', '香蕉', '青蔥-日蔥', '蓮霧-紅蓮霧', '芽菜類-黃豆牙', '南瓜-木瓜形', '大蒜-蒜仁', '小白菜-土白菜', '木瓜-網室紅肉', '洋香瓜-網狀紅肉']
total_data = total_data[total_data['作物名稱'].isin(USED_CROPS)]

# Sum up the total volume week by week

## Create weekly sequence

In [18]:
date_points = []
start_date = pd.to_datetime('2012-01-01') - pd.DateOffset(days=7)
end_date = start_date

while end_date < pd.to_datetime('2020-01-01'):
  start_date = end_date
  end_date = pd.to_datetime(start_date) + pd.DateOffset(days=7)
  date_points.append((start_date, end_date))

## Extract 7 days from the data

In [21]:
START = 0
END = 1

def extract_period(df, start, end):
  return df[(pd.to_datetime(df['DateTime']) >= start) & (pd.to_datetime(df['DateTime']) < end)]

## Group by crop & market and sum up the volume

In [53]:
def sum_up_week(df, start, end):
  summary = {'作物名稱': [], '市場名稱': [], '交易量': [], 'DateTime': [], 'Period': []}
  week_data = extract_period(df, start, end)
  week_sum = week_data.groupby(by=['作物名稱','市場名稱']).sum()
  
  for name, row in week_sum.iterrows():
    summary['作物名稱'].append(name[0])
    summary['市場名稱'].append(name[1])
    summary['交易量'].append(row.values[0])
    summary['DateTime'].append(str(start.date()))
    summary['Period'].append(str(start.date())+'~'+str(end.date()))
  
  return pd.DataFrame.from_dict(summary)

In [56]:
total_weeks = []

for d in date_points:
  total_weeks.append(sum_up_week(total_data, d[START], d[END]))

In [66]:
week_volume = pd.concat(total_weeks)
# pd.concat(total_weeks).to_csv('volume_per_week.csv', index=False)

## Save csv files by crop

In [72]:
# group by crop
group_volume = week_volume.groupby('作物名稱')
for name, group in group_volume:
  group.to_csv(f'週成交量_{name}.csv', index=False)

# Prepare the data for map plot

* 值 = 當日成交量
```
DateTime, "臺北市", "嘉義市", "新竹市", "基隆市" ...
```

In [7]:
# DateTime, "臺北市", "嘉義市", "新竹市", "基隆市", "新北市", "桃園市", "臺中市", "彰化縣", "高雄市", "臺南市", "金門縣", "澎湖縣", "雲林縣", "連江縣", "新竹縣", "苗栗縣", "屏東縣", "嘉義縣", "宜蘭縣", "南投縣", "花蓮縣", "臺東縣"
CITY = ["臺北市", "嘉義市", "新竹市", "基隆市", "新北市", "桃園市", "臺中市", "彰化縣", "高雄市", "臺南市", "金門縣", "澎湖縣", "雲林縣", "連江縣", "新竹縣", "苗栗縣", "屏東縣", "嘉義縣", "宜蘭縣", "南投縣", "花蓮縣", "臺東縣"]
market_to_city = {
 '三重區': '新竹市',
 '南投市': '南投縣',
 '台中市': '臺中市',
 '台北一': '臺北市',
 '台北二': '臺北市',
 '台東市': '臺東縣',
 '嘉義市': '嘉義市',
 '宜蘭市': '宜蘭縣',
 '屏東市': '屏東縣',
 '東勢鎮': '臺中市',
 '板橋區': '新北市',
 '桃農'  : '桃園市',
 '永靖鄉': '彰化縣',
 '溪湖鎮': '彰化縣',
 '花蓮市': '花蓮縣',
 '西螺鎮': '雲林縣',
 '豐原區': '臺中市',
 '高雄市': '高雄市',
 '鳳山區': '高雄市'}

In [14]:
def volumn_by_date(df):
  summary = {'DateTime': []}
  for c in CITY:
    summary[c] = []
  volume_data = {}
  
  for _, row in df.iterrows():
    if row['DateTime'] not in volume_data:
      volume_data[row['DateTime']] = {c:0 for c in CITY}
    else:
      volume_data[row['DateTime']][market_to_city[row['市場名稱']]] += row['交易量']
  
  for d in volume_data:
    summary['DateTime'].append(d)
    for c in volume_data[d]:
      summary[c].append(volume_data[d][c])

#   return volume_data
  return pd.DataFrame.from_dict(summary)

In [17]:
map_group_volume = total_data.groupby('作物名稱')
for name, group in map_group_volume:
  df = volumn_by_date(map_group_volume.get_group(name))
  df.to_csv(f'市場成交量_{name}.csv', index=False)