# This project analyses and visualizes a Global Food Price Dataset
## Link to the dataset: https://www.kaggle.com/datasets/lasaljaywardena/global-food-prices-dataset?resource=download

In [58]:
import pandas as pd
import os
import matplotlib.pyplot as plt
import datetime

## Data Cleaning

In [59]:
df = pd.read_csv("Data Science Projects\global_food_prices.csv", dtype = {'adm1_id': int,
                                                                          "adm0_id": int,
                                                                          "adm0_name": str, 
                                                                          "mkt_name": str, 
                                                                          "pt_id": int,
                                                                          "cm_name": str,
                                                                          "mp_price": float,
                                                                          "adm1_name": str                                                               
                                                                         })

In [60]:
df2 = df[["adm0_name", "cm_name", "um_name", "cur_name", "mp_month", "mp_year", "mp_price", "mkt_name"]]

df2 = df2.rename(columns={"adm0_name": "Country", 
                   "cm_name": "Food Item", 
                   "um_name": "Units", 
                   "cur_name": "Currency", 
                   "mp_month": "Month", 
                   "mp_year": "Year",
                   "mp_price": "Price",
                   "mkt_name": "Market"
                  })

In [61]:
# df2['Food Item'] = df2['Food Item'].replace(['- Retail', '- Wholesale'], ' ')
# df2.head()


df2['Food'] = df2['Food Item'].str.replace('- Retail', ' ')
df2["Food Item"] = df2["Food"]
df2['Food'] = df2['Food Item'].str.replace('- Wholesale', ' ')

#### Lookup function

In [62]:
# df2[df2['Food'].str.contains("Wholesale")]

In [63]:
df2.drop(columns = "Food Item", inplace = True)

#### Filtering for a specific country

In [64]:
# Honduras = df2[df2['Country'].str.contains("Honduras")]

#### Converting Month and Year to Datetime format

In [65]:
df2["Date"] = pd.to_datetime(df2['Year'].astype(str)  + df2['Month'].astype(str), format='%Y%m')
df2.drop(columns = {'Month', 'Year'}, inplace = True)

#### Data for each country from beginning of each year

In [66]:
# filter by single day
# df_filtered = df[df['date'].dt.strftime('%Y-%m-%d') == '2014-01-01']

# filter by single month
# df_filtered = df[df['date'].dt.strftime('%Y-%m') == '2014-01']

# filter by single year
# df_filtered = df[df['date'].dt.strftime('%Y') == '2014']

In [67]:
# df2[df2['Date'].dt.strftime('%m') == '01']
# df2["Quarter"] = df2["Date"].dt.quarter
df2[df2['Date'].dt.strftime('%m') == '01']

Unnamed: 0,Country,Units,Currency,Price,Market,Food,Date
0,Afghanistan,KG,AFN,50.0000,Fayzabad,Bread,2014-01-01
12,Afghanistan,KG,AFN,50.0000,Fayzabad,Bread,2015-01-01
22,Afghanistan,KG,AFN,55.5000,Fayzabad,Bread,2016-01-01
33,Afghanistan,KG,AFN,50.0000,Fayzabad,Bread,2017-01-01
45,Afghanistan,KG,AFN,50.0000,Fayzabad,Bread,2018-01-01
...,...,...,...,...,...,...,...
2050499,Zimbabwe,KG,ZWL,235.5555,Torwood 2,Beans (sugar),2021-01-01
2050506,Zimbabwe,100 ML,ZWL,103.3333,Torwood 2,Toothpaste,2021-01-01
2050513,Zimbabwe,KG,ZWL,113.3333,Torwood 2,Laundry soap,2021-01-01
2050520,Zimbabwe,250 G,ZWL,59.1667,Torwood 2,Handwash soap,2021-01-01


# Analysis of food price patterns in one specific country and for one food item:
## 2 types of analysis were conducted for Afghanistan's Bread Prices:
### 1) Market Analysis (Prices throughout the 21st century of food prices for different markets)

In [68]:
AfghanBread = df2[(df2["Country"] == 'Afghanistan') & (df2['Food'].str.contains('Bread'))]
# AfghanBread.head(50)
# AfghanBread.to_csv('Afbr', sep='\t')
# AFBR.to_excel('Afbr2.xlsx')

In [69]:
AFBR = AfghanBread.sort_values(["Market", "Date"], ascending=True, ignore_index=True)
# AFBR.head(60)
# AFBR.to_excel('Afbr3.xlsx')

### 2) Country Analysis (Food Prices averaged across all markets and analysed throughout the years)

In [70]:
AFBR_Mean = AFBR.groupby('Date')['Price'].mean() 
# Groupby one column and return the mean of only particular column in the group.
AFBR_Mean.head()
# AFBR_Mean.to_excel('Afbr4.xlsx')


Date
2000-01-01    15.2500
2000-02-01    15.3000
2000-03-01    14.7525
2000-04-01    17.1950
2000-05-01    18.2775
Name: Price, dtype: float64

In [71]:
df2.groupby(['Country', 'Date']).mean()

  df2.groupby(['Country', 'Date']).mean()


Unnamed: 0_level_0,Unnamed: 1_level_0,Price
Country,Date,Unnamed: 2_level_1
Afghanistan,2000-01-01,27717.866667
Afghanistan,2000-02-01,28620.300000
Afghanistan,2000-03-01,27001.308696
Afghanistan,2000-04-01,27493.947083
Afghanistan,2000-05-01,265318.800443
...,...,...
Zimbabwe,2021-02-01,211.339288
Zimbabwe,2021-03-01,219.791897
Zimbabwe,2021-04-01,212.235271
Zimbabwe,2021-05-01,199.064952
