In [4]:
# Load required libraries
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)

In [6]:
# Load required libraries
library(dplyr)
library(tidyr)
library(lubridate)
library(ggplot2)

# Load and preprocess data
food_prices <- read.csv("food_prices_ph.csv", stringsAsFactors = TRUE)
food_prices$Date <- as.Date(food_prices$Date)
food_prices$Price <- as.numeric(food_prices$Price)
food_prices$Year <- year(food_prices$Date)

# Define ingredient categories with improved structure
ingredient_categories <- list(
  protein = c(
    "Meat (pork)", "Meat (beef, chops with bones)", "Meat (chicken, whole)",
    "Fish (fresh)", "Fish (frigate tuna)", "Fish (mackerel, fresh)",
    "Fish (tilapia)", "Fish (roundscad)", "Fish (slipmouth)",
    "Fish (threadfin bream)", "Beans (mung)", "Groundnuts (shelled)",
    "Eggs", "Eggs (duck)", "Meat (pork, with bones)", "Meat (beef)",
    "Meat (pork, hock)", "Meat (pork, with bones)", "Shrimp (endeavor)",
    "Shrimp (tiger)", "Crab", "Fish (redbelly yellowtail fusilier)",
    "Fish (milkfish)"
  ),
  carbohydrate = c(
    "Rice (milled, superior)", "Rice (regular, milled)", "Rice (premium)",
    "Rice (special)", "Rice (well milled)", "Maize flour (yellow)",
    "Maize flour (white)", "Maize (yellow)", "Maize (white)",
    "Potatoes (Irish)", "Sweet potatoes", "Semolina (white)",
    "Semolina (yellow)", "Taro"
  ),
  vegetables = c(
    "Cabbage", "Cabbage (chinese)", "Carrots", "Garlic", "Onions (red)",
    "Onions (white)", "Tomatoes", "Eggplants", "Bitter melon", "Bottle gourd",
    "Choko", "Water spinach", "Sweet Potato leaves", "Choko",
    "Water spinach"
  ),
  fruits = c(
    "Bananas (latundan)", "Bananas (saba)", "Bananas (lakatan)",
    "Mangoes (carabao)", "Mangoes (piko)", "Pineapples", "Coconut"
  ),
  oils_and_condiments = c(
    "Oil (cooking)", "Sugar (brown)", "Sugar (white)", "Garlic (large)",
    "Garlic (small)", "Ginger", "Calamansi", "Anchovies"
  )
)

# Create category mapping function
assign_category <- function(commodity) {
  for (cat in names(ingredient_categories)) {
    if (commodity %in% ingredient_categories[[cat]]) {
      return(cat)
    }
  }
  return("Other")
}

# Process data
food_prices <- food_prices %>%
  filter(year(Date) >= 2019, Unit != "Unit") %>%
  mutate(
    Category = sapply(Commodity, assign_category),
    UnitPrice_kg = case_when(
      Unit == "750 ML" & Commodity == "Oil (cooking)" ~ Price / (0.75 * 0.92),
      Unit == "KG" ~ Price,
      TRUE ~ NA
    )
  ) %>%
  filter(!is.na(UnitPrice_kg))

# Calculate yearly averages with improved imputation
yearly_avg_prices <- food_prices %>%
  group_by(Year, Category) %>%
  summarise(Avg_Price = mean(UnitPrice_kg, na.rm = TRUE), .groups="keep") %>%
  pivot_wider(names_from = Category, values_from = Avg_Price) %>%
  arrange(Year)

# Impute missing values using forward and backward fill
yearly_avg_prices <- yearly_avg_prices %>%
  mutate(
    Protein = zoo::na.locf(zoo::na.locf(Protein, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE),
    Carbohydrate = zoo::na.locf(zoo::na.locf(Carbohydrate, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE),
    Vegetable = zoo::na.locf(zoo::na.locf(Vegetable, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE),
    Fruit = zoo::na.locf(zoo::na.locf(Fruit, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE),
    `Oils and Condiments` = zoo::na.locf(zoo::na.locf(`Oils and Condiments`, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE),
    Other = zoo::na.locf(zoo::na.locf(Other, na.rm = FALSE, fromLast = TRUE), na.rm = FALSE)
  )

# Define meal composition
meal_composition <- list(
  protein = 175,    # grams
  carbohydrate = 250,
  vegetable = 100,
  fruit = 150,
  oils_and_condiments = 10
)

# Calculate base meal price
yearly_avg_prices$Base_Meal_Price <-
  (meal_composition$protein * yearly_avg_prices$Protein / 1000) +
  (meal_composition$carbohydrate * yearly_avg_prices$Carbohydrate / 1000) +
  (meal_composition$vegetable * yearly_avg_prices$Vegetable / 1000) +
  (meal_composition$fruit * yearly_avg_prices$Fruit / 1000) +
  (meal_composition$oils_and_condiments * yearly_avg_prices$`Oils and Condiments` / 1000)

# Get latest price
latest_price <- yearly_avg_prices %>%
  filter(Year == max(Year)) %>%
  pull(Base_Meal_Price)

# Create visualization
ggplot(yearly_avg_prices, aes(x = Year)) +
  geom_line(aes(y = Base_Meal_Price, color = "Base Meal Price")) +
  geom_point(aes(y = Base_Meal_Price)) +
  theme_minimal() +
  labs(
    title = "Base Meal Price Trend (2019-Present)",
    x = "Year",
    y = "Price (PHP)"
  ) +
  theme(legend.position = "bottom")

ERROR: [1m[33mError[39m in `mutate()`:[22m
[1m[22m[36mℹ[39m In argument: `Protein = zoo::na.locf(...)`.
[36mℹ[39m In group 1: `Year = 2019`.
[1mCaused by error:[22m
[33m![39m object 'Protein' not found
