In [77]:
import os

import pandas as pd

In [78]:
# Load food price data
food_price = pd.read_excel(
    'food_price.xlsx',
    skiprows=[1]
)

# Select and rename columns
food_price = food_price[[
    "NAMA VARIABEL", "NAMA WILAYAH VARIABEL",
    "TAHUN VARIABEL", "TURUNAN TAHUN VARIABEL",
    "NILAI"
]]
food_price.columns = [
    'Nama', 'Wilayah', 'Tahun',
    'Tanggal', 'Harga'
]

# Fix typo
food_price['Nama'].replace('Besar', 'Beras', inplace=True)

# Extract month from date
food_price['Bulan'] = pd.DatetimeIndex(food_price['Tanggal']).month

# Translate coded values in column 'Tahun'
food_price['Tahun'] = food_price['Tahun'].apply(
    lambda x : 2019 - (x - 17)
)

# Extract date from column 'Tanggal'
food_price['Tanggal'] = pd.DatetimeIndex(food_price['Tanggal']).day

# Convert data from long format to wide format
food_price = food_price.pivot_table(
    index=['Wilayah', 'Tahun', 'Bulan', 'Tanggal'],
    columns='Nama',
    values='Harga'
)
food_price = food_price.reset_index()

# Delete name of index
food_price.columns.name = None

food_price.to_csv('food_price.csv', index=False)

food_price.head()

Unnamed: 0,Wilayah,Tahun,Bulan,Tanggal,Bawang Merah,Bawang Putih,Beras,Cabai Merah,Cabai Rawit,Daging Ayam,Daging Sapi,Gula Pasir,Minyak Goreng,Telur Ayam
0,Indonesia,2016,7,1,45300,43150,11550,32600,40250,40000,121100,17800,14200,25450
1,Indonesia,2016,7,11,46300,40900,11600,35050,41850,40700,121550,17550,14150,23750
2,Indonesia,2016,7,12,47550,39100,11600,33550,37450,38650,121450,17550,14100,24050
3,Indonesia,2016,7,13,47550,38600,11600,32200,36150,37700,120850,17550,14100,24000
4,Indonesia,2016,7,14,43950,38900,11500,34800,38050,40200,116600,17500,14000,23900


In [83]:
# Read in food_price data
food_price = pd.read_csv('food_price.csv')

# Drop unused columns
food_price.drop('Tanggal', axis=1, inplace=True)

food_price.groupby(['Tahun', 'Bulan', 'Wilayah']).mean().reset_index()

# Perform merging in order to calculate YoY growth
food_price['Prev Year'] = food_price['Tahun'] - 1
food_price = pd.merge(
    food_price, food_price,
    left_on=['Prev Year', 'Bulan', 'Wilayah'], right_on=['Tahun', 'Bulan', 'Wilayah']
)

# Drop unnecessary columns
food_price.drop(['Prev Year_x', 'Tahun_y', 'Prev Year_y'], axis=1, inplace=True)

# Rename Tahun_x
food_price['Tahun'] = food_price['Tahun_x']
food_price.drop('Tahun_x', axis=1, inplace=True)

food_names = []
# Calculate YoY Growth
for col in food_price:
    if col[-1] != 'x':
        continue
  
    food_name = col[:-2]
    food_names.append(food_name)
  
    food_price[food_name + ' Growth'] = (
        (food_price[food_name + '_x'] / food_price[food_name + '_y'] - 1)
        * 100
    )

# Select only Tahun, Quarter, and growth columns
food_price = food_price[
    ['Tahun', 'Bulan', 'Wilayah'] + [name + ' Growth' for name in food_names]
]

# Rename columns
food_price.columns = ['Tahun', 'Bulan', 'Wilayah'] + food_names

food_price.head()

Unnamed: 0,Tahun,Bulan,Wilayah,Bawang Merah,Bawang Putih,Beras,Cabai Merah,Cabai Rawit,Daging Ayam,Daging Sapi,Gula Pasir,Minyak Goreng,Telur Ayam
0,2017,7,Indonesia,-16.11479,-8.690614,-2.597403,-0.460123,1.36646,-13.25,-4.748142,-15.730337,-1.056338,-14.145383
1,2017,7,Indonesia,-17.926566,-3.667482,-3.017241,-7.417974,-2.508961,-14.742015,-5.100782,-14.529915,-0.706714,-8.0
2,2017,7,Indonesia,-20.084122,0.767263,-3.017241,-3.278689,8.94526,-10.219922,-5.022643,-14.529915,-0.35461,-9.147609
3,2017,7,Indonesia,-20.084122,2.072539,-3.017241,0.776398,12.863071,-7.95756,-4.551096,-14.529915,-0.35461,-8.958333
4,2017,7,Indonesia,-13.538111,1.285347,-2.173913,-6.752874,7.227332,-13.681592,-1.072041,-14.285714,0.357143,-8.577406


In [65]:
def calculate_index(row):
    weights = pd.read_csv(os.path.join('input', 'food_weight.csv'))
    
    Pn = 0
    Po = 0
    for food in pd.unique(weights['food']):
        food_weight = weights.loc[weights['food'] == food]['weight'].values[0]
        Pn += row[food + '_x'] * food_weight
        Po += row[food + '_y'] * food_weight
    
    index = Pn / Po * 100
    return index - 100

In [66]:
from functools import reduce

food_price = food_price.loc[
    ~(
        (food_price['Tahun'] == 2019)
        & (food_price['Bulan'] >= 10)
    )
]

food_price['Prev Bulan'] = food_price.apply(
    lambda row: row['Bulan'] - 1 if row['Bulan'] > 1 else 12, axis=1
)

food_price['Prev Tahun'] = food_price.apply(
    lambda row: row['Tahun'] if row['Prev Bulan'] != 12 else row['Tahun'] - 1,
    axis=1
)

food_price = pd.merge(
    food_price, food_price,
    left_on=['Prev Tahun', 'Prev Bulan', 'Provinsi'],
    right_on=['Tahun', 'Bulan', 'Provinsi']
)

food_price = food_price.drop(
    ['Tahun_y', 'Prev Tahun_x', 'Prev Tahun_y', 'Prev Bulan_x', 'Prev Bulan_y'], axis=1
)

food_price['index'] = food_price.apply(calculate_index, axis=1)

weights = pd.read_csv(os.path.join('input', 'food_weight.csv'))
food_names = pd.unique(weights['food'])
cols_to_drop = [col + '_y' for col in food_names]

food_price = food_price.drop(cols_to_drop, axis=1)

food_price.columns = [
    col[:-2] if col[-2:] == '_x' else col for col in food_price.columns
]

food_price.head()

KeyError: 'Provinsi'