In [None]:
import pandas as pd

# Correct file paths (including the (1))
inflation_path = "./API_NY.GDP.DEFL.KD.ZG_DS2_en_csv_v2_22672 (1)/API_NY.GDP.DEFL.KD.ZG_DS2_en_csv_v2_22672.csv"
cpi_path = "./API_FP.CPI.TOTL.ZG_DS2_en_csv_v2_23195 (1)/API_FP.CPI.TOTL.ZG_DS2_en_csv_v2_23195.csv"

# Load CSVs while skipping metadata rows
inflation_df = pd.read_csv(inflation_path, skiprows=4)
cpi_df = pd.read_csv(cpi_path, skiprows=4)

print(" Inflation (GDP Deflator) loaded:", inflation_df.shape)
print(" CPI data loaded:", cpi_df.shape)

# Quick preview
display(inflation_df.head(3))
display(cpi_df.head(3))


In [None]:
import pandas as pd

# Path to your Excel file
fao_path = "./Food_price_indices_data_may629 (2).xls"

# Load the first sheet (usually contains monthly data)
fao_df = pd.read_excel(fao_path, sheet_name=0, skiprows=2)  # skip top 2 rows if metadata exists

# Quick look
print(" FAO Food Price Indices shape:", fao_df.shape)
display(fao_df.head(5))


In [None]:
import os

# List all files in the Inflation-data folder
inflation_data_folder = "./Inflation-data"
print("Files in Inflation-data folder:", os.listdir(inflation_data_folder))


In [None]:
import os
import pandas as pd

# Folder path
inflation_data_folder = "./Inflation-data"

# List all .dta files
dta_files = [f for f in os.listdir(inflation_data_folder) if f.endswith(".dta")]

# Dictionary to store loaded dataframes
inflation_dfs = {}

for file in dta_files:
    path = os.path.join(inflation_data_folder, file)
    df = pd.read_stata(path)
    inflation_dfs[file] = df
    print(f" Loaded {file} with shape {df.shape}")
    display(df.head(3))


In [None]:
import pandas as pd

fao_path = "./Food_price_indices_data_may629 (2).xls"
fao_df = pd.read_excel(fao_path, sheet_name=0, skiprows=2)

# Drop empty/unnecessary columns
fao_df = fao_df.loc[:, ~fao_df.columns.str.contains('^Unnamed')]

# Convert Date to datetime
fao_df['Date'] = pd.to_datetime(fao_df['Date'], errors='coerce')

# Add Year and Month
fao_df['Year'] = fao_df['Date'].dt.year
fao_df['Month'] = fao_df['Date'].dt.month

# Preview
print("Columns after cleaning:", fao_df.columns.tolist())
display(fao_df.head())


In [None]:
import pandas as pd

# Load the Kenya monthly food CPI .dta
fcpi_path = "./Inflation-data/fcpi_m.dta"
fcpi_df = pd.read_stata(fcpi_path)

# Check columns and first few rows
print("Columns in fcpi_m.dta:", fcpi_df.columns.tolist())
display(fcpi_df.head())


In [None]:
import pandas as pd

# Assume 'merged' is your merged dataset from before
merged_clean = merged.copy()

# --- 1. Ensure Date column is datetime ---
merged_clean['Date'] = pd.to_datetime(merged_clean['Date'], errors='coerce')

# --- 2. Ensure numeric columns are float ---
numeric_cols = ['FPI', 'Food_CPI', 'CPI', 'GDP_Deflator', 'Year', 'Month']
for col in numeric_cols:
    merged_clean[col] = pd.to_numeric(merged_clean[col], errors='coerce')

# --- 3. Handle missing values ---

# 3a. FAO Food CPI: interpolate within each country
merged_clean['Food_CPI'] = merged_clean.groupby('Country')['Food_CPI'].transform(
    lambda x: x.interpolate(method='linear')
)

# 3b. World Bank CPI: forward/backward fill within each country
merged_clean['CPI'] = merged_clean.groupby('Country')['CPI'].transform(
    lambda x: x.fillna(method='ffill').fillna(method='bfill')
)

# 3c. GDP Deflator: forward/backward fill within each country
merged_clean['GDP_Deflator'] = merged_clean.groupby('Country')['GDP_Deflator'].transform(
    lambda x: x.fillna(method='ffill').fillna(method='bfill')
)

# --- 4. Optional: Fill any remaining NaNs in FPI with overall interpolation ---
merged_clean['FPI'] = merged_clean['FPI'].interpolate(method='linear')

# --- 5. Check final shape and missing values ---
print("Cleaned dataset shape:", merged_clean.shape)
print("Missing values per column:")
print(merged_clean.isna().sum())

# --- 6. Preview cleaned data ---
display(merged_clean.head(10))


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Aggregate per year & commodity
agg = merged.groupby(['Year','Commodity']).agg({
    'FPI':'mean',
    'Food_CPI':'mean',
    'CPI':'mean'
}).reset_index()

# Plot FPI trends per commodity
plt.figure(figsize=(16,8))
sns.lineplot(data=agg, x='Year', y='FPI', hue='Commodity', marker='o')
plt.title('Global Food Price Index (FPI) Trends per Commodity')
plt.ylabel('FPI')
plt.xlabel('Year')
plt.legend(title='Commodity', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

#  plot Food_CPI and CPI per commodity (only for countries with Food_CPI)
plt.figure(figsize=(16,8))
sns.lineplot(data=agg, x='Year', y='Food_CPI', hue='Commodity', marker='o')
plt.title('Average Food_CPI per Commodity Across Countries')
plt.ylabel('Food_CPI')
plt.xlabel('Year')
plt.legend(title='Commodity', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()

plt.figure(figsize=(16,8))
sns.lineplot(data=agg, x='Year', y='CPI', hue='Commodity', marker='o')
plt.title('Average CPI per Commodity Across Countries')
plt.ylabel('CPI')
plt.xlabel('Year')
plt.legend(title='Commodity', bbox_to_anchor=(1.05, 1), loc='upper left')
plt.tight_layout()
plt.show()
