In [None]:
#__author__ = "Admin GGCS"
#__copyright__ = "Copyright 2019"
#__license__ = "MIT"
#__version__ = "1.0.0"
#__maintainer__ = "Admin GGCS"
#__website__ = "ggcs.io

# 0 Setup

In [30]:
%matplotlib inline
import pandas as pd
import numpy as np
import os

# 1 データの準備

## 1.1 CSVから読み込み

In [31]:
#元データ読み込み
raw_df = pd.read_csv(
    'jiro-nutrition-facts.csv', header = 8, 
    #必要な列を読み込む
    usecols = ['食品番号','食品名','エネルギー（kcal）','たんぱく質','脂   質','炭水化物','カリウム','カルシウム','ビタミンB1','ビタミンB2','ビタミンC','飽和脂肪酸','食塩相当量'],
    #以下の２項目は現段階でデータ型を明示しておいたほうが何かと都合が良い。
    dtype = {'食品番号': str, '食品名': str}
)

#雰囲気を確認するとこんな感じ
#お掃除の方針
#→カッコに入った数値、i.e. (0.28)とか(0)とか、は、推定値を表すので、カッコを外して算入することにしたい。
#→Trや - は 0.0 にしておいた方が安心（NaNにするとあとで面倒くさいので）。
raw_df

Unnamed: 0,食品番号,食品名,エネルギー（kcal）,たんぱく質,脂 質,飽和脂肪酸,炭水化物,カリウム,カルシウム,ビタミンB1,ビタミンB2,ビタミンC,食塩相当量
0,1047,こむぎ　［中華めん類］　中華めん　生,281,8.6,1.2,(0.28),55.7,350,21,0.02,0.02,(0),1.0
1,6061,（キャベツ類）　キャベツ　結球葉　生,23,1.3,0.2,0.02,5.2,200,43,0.04,0.03,41,0.0
2,6287,（もやし類）　だいずもやし　生,37,3.7,1.5,0.20,2.3,160,23,0.09,0.07,5,0.0
3,11119,＜畜肉類＞ぶた　［大型種肉］　かたロース　脂身つき　生,253,17.1,19.2,7.26,0.1,300,4,0.63,0.23,2,0.1
4,14006,（植物油脂類）　調合油,921,0.0,100.0,10.97,0,Tr,Tr,0.0,0.0,(0),0.0
5,14016,（動物脂類）　ラード,941,0.0,100.0,39.29,0,0,0,0.0,0.0,0,0.0
6,17007,＜調味料類＞（しょうゆ類）こいくちしょうゆ,71,7.7,0.0,-,10.1,390,29,0.05,0.17,0,14.5
7,17024,＜調味料類＞（だし類）鳥がらだし,7,1.1,0.2,0.06,Tr,65,2,0.02,0.09,(0),0.1
8,17025,＜調味料類＞（だし類）中華だし,3,0.8,0.0,-,Tr,90,3,0.15,0.03,0,0.1
9,17026,＜調味料類＞（だし類）洋風だし,6,1.3,0.0,-,0.3,110,5,0.02,0.05,0,0.5


## 1.2 お掃除

In [32]:
#数値のカッコを外し、文字は 0 に。
clean_df = raw_df.replace(['\((\d+\.*\d*)\)', 'Tr', '-'], [r'\1', 0.0, 0.0], regex=True)
#データタイプを float に揃えておく（これをやっておかないとあとの計算で原因不意の error でハマることが多い。）
clean_df.iloc[:,2:] = clean_df.iloc[:,2:].astype(float)

#確認
clean_df

Unnamed: 0,食品番号,食品名,エネルギー（kcal）,たんぱく質,脂 質,飽和脂肪酸,炭水化物,カリウム,カルシウム,ビタミンB1,ビタミンB2,ビタミンC,食塩相当量
0,1047,こむぎ　［中華めん類］　中華めん　生,281.0,8.6,1.2,0.28,55.7,350.0,21.0,0.02,0.02,0.0,1.0
1,6061,（キャベツ類）　キャベツ　結球葉　生,23.0,1.3,0.2,0.02,5.2,200.0,43.0,0.04,0.03,41.0,0.0
2,6287,（もやし類）　だいずもやし　生,37.0,3.7,1.5,0.2,2.3,160.0,23.0,0.09,0.07,5.0,0.0
3,11119,＜畜肉類＞ぶた　［大型種肉］　かたロース　脂身つき　生,253.0,17.1,19.2,7.26,0.1,300.0,4.0,0.63,0.23,2.0,0.1
4,14006,（植物油脂類）　調合油,921.0,0.0,100.0,10.97,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,14016,（動物脂類）　ラード,941.0,0.0,100.0,39.29,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,17007,＜調味料類＞（しょうゆ類）こいくちしょうゆ,71.0,7.7,0.0,0.0,10.1,390.0,29.0,0.05,0.17,0.0,14.5
7,17024,＜調味料類＞（だし類）鳥がらだし,7.0,1.1,0.2,0.06,0.0,65.0,2.0,0.02,0.09,0.0,0.1
8,17025,＜調味料類＞（だし類）中華だし,3.0,0.8,0.0,0.0,0.0,90.0,3.0,0.15,0.03,0.0,0.1
9,17026,＜調味料類＞（だし類）洋風だし,6.0,1.3,0.0,0.0,0.3,110.0,5.0,0.02,0.05,0.0,0.5


In [33]:
#様子を見る　→ 良さげ
clean_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 13 columns):
食品番号           10 non-null object
食品名            10 non-null object
エネルギー（kcal）    10 non-null float64
たんぱく質          10 non-null float64
脂   質          10 non-null float64
飽和脂肪酸          10 non-null float64
炭水化物           10 non-null float64
カリウム           10 non-null float64
カルシウム          10 non-null float64
ビタミンB1         10 non-null float64
ビタミンB2         10 non-null float64
ビタミンC          10 non-null float64
食塩相当量          10 non-null float64
dtypes: float64(11), object(2)
memory usage: 1.1+ KB


In [34]:
#ダメだった時の犯人探し用
#for i in range(0, clean_df.shape[0]):
#    for j in  range(1,clean_df.shape[1]):
#        print(i,j, clean_df.iloc[i,j], type(clean_df.iloc[i,j]))

# 2 計算

# 2.1 計算用に空のDataframeを作る

In [35]:
#新たに calc_df を作る　← clean_df をcopy()して改造する方針。
calc_df = clean_df.copy()
#2列目の右に「食材重量(100g)」欄を挿入。初期値は 0.0
calc_df.insert(2,'食材重量(100g)', 0.0)
#全体的に数値はすべて 0.0 で初期化しておく。
calc_df.iloc[:,2:] = 0.0

#確認
calc_df

Unnamed: 0,食品番号,食品名,食材重量(100g),エネルギー（kcal）,たんぱく質,脂 質,飽和脂肪酸,炭水化物,カリウム,カルシウム,ビタミンB1,ビタミンB2,ビタミンC,食塩相当量
0,1047,こむぎ　［中華めん類］　中華めん　生,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,6061,（キャベツ類）　キャベツ　結球葉　生,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,6287,（もやし類）　だいずもやし　生,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,11119,＜畜肉類＞ぶた　［大型種肉］　かたロース　脂身つき　生,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,14006,（植物油脂類）　調合油,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,14016,（動物脂類）　ラード,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,17007,＜調味料類＞（しょうゆ類）こいくちしょうゆ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,17024,＜調味料類＞（だし類）鳥がらだし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,17025,＜調味料類＞（だし類）中華だし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,17026,＜調味料類＞（だし類）洋風だし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## 2.2 食材重量を設定

In [36]:
#食材重量設定の例

#めん重量設定（100g 単位）
calc_df.iloc[0,2] = 2.8 #めん
#キャベツ重量設定（100g 単位）
calc_df.iloc[1,2] = 0.4 #キャベツ
#もやし重量設定（100g 単位）
calc_df.iloc[2,2] = 0.3 #もやし
#ぶた重量設定（100g 単位）
calc_df.iloc[3,2] = 1.1 #ぶた
#植物油脂重量設定（100g 単位）
calc_df.iloc[4,2] = 0.0 #植物油脂
#動物脂類重量設定（100g 単位）
calc_df.iloc[5,2] = 0.29 #動物脂類
#しょうゆ重量設定（100g 単位）
calc_df.iloc[6,2] = 0.0 #しょうゆ
#鳥がらだし量設定（100g 単位）
calc_df.iloc[7,2] = 0.0 #鳥がらだし
#中華だし重量設定（100g 単位）
calc_df.iloc[8,2] = 0.0 #中華だし
#洋風だし重量設定（100g 単位）
calc_df.iloc[9,2] = 2.0 #洋風だし

## 2.3 計算

In [37]:
#表計算操作（一行ずつ計算。ちょっと頭悪そう…）
for k in range(calc_df.shape[0]):
    calc_df.iloc[k,3:] = clean_df.iloc[k,2:] * calc_df.iloc[k,2]
    
#確認 → 良さげ :)
calc_df

Unnamed: 0,食品番号,食品名,食材重量(100g),エネルギー（kcal）,たんぱく質,脂 質,飽和脂肪酸,炭水化物,カリウム,カルシウム,ビタミンB1,ビタミンB2,ビタミンC,食塩相当量
0,1047,こむぎ　［中華めん類］　中華めん　生,2.8,786.8,24.08,3.36,0.784,155.96,980.0,58.8,0.056,0.056,0.0,2.8
1,6061,（キャベツ類）　キャベツ　結球葉　生,0.4,9.2,0.52,0.08,0.008,2.08,80.0,17.2,0.016,0.012,16.4,0.0
2,6287,（もやし類）　だいずもやし　生,0.3,11.1,1.11,0.45,0.06,0.69,48.0,6.9,0.027,0.021,1.5,0.0
3,11119,＜畜肉類＞ぶた　［大型種肉］　かたロース　脂身つき　生,1.1,278.3,18.81,21.12,7.986,0.11,330.0,4.4,0.693,0.253,2.2,0.11
4,14006,（植物油脂類）　調合油,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,14016,（動物脂類）　ラード,0.29,272.89,0.0,29.0,11.3941,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,17007,＜調味料類＞（しょうゆ類）こいくちしょうゆ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,17024,＜調味料類＞（だし類）鳥がらだし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,17025,＜調味料類＞（だし類）中華だし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,17026,＜調味料類＞（だし類）洋風だし,2.0,12.0,2.6,0.0,0.0,0.6,220.0,10.0,0.04,0.1,0.0,1.0


In [38]:
#項目ごとの合計値を確認
calc_df.iloc[:,3:].sum()

エネルギー（kcal）    1370.2900
たんぱく質            47.1200
脂   質            54.0100
飽和脂肪酸            20.2321
炭水化物            159.4400
カリウム           1658.0000
カルシウム            97.3000
ビタミンB1            0.8320
ビタミンB2            0.4420
ビタミンC            20.1000
食塩相当量             3.9100
dtype: float64

In [39]:
#合計の行を calc_df の最後に無理やり（calc_df.iloc[:,3:].sum()）加えて summary_df を作る。
summary_df = calc_df.append(calc_df.sum(), ignore_index=True)
#それだけだとカッコ悪いのでちょっと整える。
summary_df.iloc[10,[0,1]] = (np.nan, '合計')
#確認
summary_df

Unnamed: 0,食品番号,食品名,食材重量(100g),エネルギー（kcal）,たんぱく質,脂 質,飽和脂肪酸,炭水化物,カリウム,カルシウム,ビタミンB1,ビタミンB2,ビタミンC,食塩相当量
0,1047.0,こむぎ　［中華めん類］　中華めん　生,2.8,786.8,24.08,3.36,0.784,155.96,980.0,58.8,0.056,0.056,0.0,2.8
1,6061.0,（キャベツ類）　キャベツ　結球葉　生,0.4,9.2,0.52,0.08,0.008,2.08,80.0,17.2,0.016,0.012,16.4,0.0
2,6287.0,（もやし類）　だいずもやし　生,0.3,11.1,1.11,0.45,0.06,0.69,48.0,6.9,0.027,0.021,1.5,0.0
3,11119.0,＜畜肉類＞ぶた　［大型種肉］　かたロース　脂身つき　生,1.1,278.3,18.81,21.12,7.986,0.11,330.0,4.4,0.693,0.253,2.2,0.11
4,14006.0,（植物油脂類）　調合油,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,14016.0,（動物脂類）　ラード,0.29,272.89,0.0,29.0,11.3941,0.0,0.0,0.0,0.0,0.0,0.0,0.0
6,17007.0,＜調味料類＞（しょうゆ類）こいくちしょうゆ,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,17024.0,＜調味料類＞（だし類）鳥がらだし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,17025.0,＜調味料類＞（だし類）中華だし,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,17026.0,＜調味料類＞（だし類）洋風だし,2.0,12.0,2.6,0.0,0.0,0.6,220.0,10.0,0.04,0.1,0.0,1.0


# CSVに保存

In [None]:
summary_df.to_csv('jiroCalc-sumary.csv')
os.system('open .')