In [5]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.cluster import KMeans
from IPython.display import display
import ipywidgets as widgets

# Define global variables
food_items = ['Rice (basmati, broken)', 'Wheat flour', 'Sugar', 'Milk', 'Wheat', 'Beans', 'Fish', 'Maize']


# Load and preprocess data
df = pd.read_csv('wfp_food_prices_pak.csv', header=1)
df['date'] = pd.to_datetime(df['#date'])
df['price'] = pd.to_numeric(df['#value'], errors='coerce')

# Define functions for each part
def part1():
    # Part 1: Find the general trend in food prices over time
    # Selecting specific food items for analysis
#     df_part1 = df[df['#item+name'].isin(food_items)]
#     food_items = ['Rice (basmati, broken)', 'Wheat flour', 'Sugar', 'Milk', 'Wheat', 'Beans', 'Fish', 'Maize']
    df_part1 = df[df['#item+name'].isin(food_items)]

    # Aggregate prices per month
    monthly_prices = df_part1.groupby(pd.Grouper(key='date', freq='M'))['price'].mean().reset_index()

    # Apply Linear Regression to identify trends
    X = monthly_prices.index.values.reshape(-1, 1)
    y = monthly_prices['price']
    model = LinearRegression()
    model.fit(X, y)
    trend = model.predict(X)

    # Visualize the trend
    plt.figure(figsize=(12, 6))
    plt.scatter(monthly_prices['date'], monthly_prices['price'], color='lightblue', label='Monthly Average Prices')
    plt.plot(monthly_prices['date'], trend, color='red', label='Trend')
    plt.title('General Trend in Food Prices Over Time')
    plt.xlabel('Date')
    plt.ylabel('Average Price')
    plt.legend()
    plt.show()

    # Decompose time series to observe seasonal patterns
    decomposed = sm.tsa.seasonal_decompose(monthly_prices['price'], model='additive', period=12)
    decomposed.plot()
    plt.show()

def part2():
    # Part 2: Analyse the significant relationship between different food prices
    food_items = ['Rice (basmati, broken)', 'Wheat flour', 'Sugar', 'Milk', 'Wheat', 'Beans', 'Fish', 'Maize']
    df_part1 = df[df['#item+name'].isin(food_items)]
    df_part1 = df[df['#item+name'].isin(food_items)]

    pivot_df = df_part1.pivot_table(values='price', index='date', columns='#item+name')
    pivot_df.dropna(inplace=True)

    # Using a Decision Tree to understand feature importance
    target = 'Wheat flour'
    features = pivot_df.columns.drop(target)
    X = pivot_df[features]
    y = pivot_df[target]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    tree_model = DecisionTreeRegressor(max_depth=5, random_state=42)
    tree_model.fit(X_train, y_train)

    # Evaluate and visualize feature importances
    y_pred = tree_model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    plt.figure(figsize=(10, 6))
    plt.barh(features, tree_model.feature_importances_)
    plt.xlabel('price')
    plt.ylabel('Food Items')
    plt.title('Feature Importances in Predicting Wheat Flour Prices')
    plt.show()

def part3():
    # Part 3: Analyse the impact of increased oil prices on the prices of other food items
    # Function to analyze the impact of diesel prices on food items
    # food_items_one = ['Wheat flour', 'Rice (basmati, broken)', 'Sugar', 'Milk']
    def analyze_impact_of_diesel_on_food(food_item):
        # Filter and rename columns for diesel data
        diesel_df = df[df['#item+name'] == 'Fuel (diesel)'][['date', 'price']].dropna()
        diesel_df.rename(columns={'price': 'price_diesel'}, inplace=True)

        # Filter and rename columns for the specified food item
        food_df = df[df['#item+name'] == food_item][['date', 'price']].dropna()
        food_df.rename(columns={'price': f'price_{food_item.replace(" ", "_")}'}, inplace=True)

        # Merge the two dataframes on the 'date' column
        merged_df = pd.merge(diesel_df, food_df, on='date')

        # Prepare data for linear regression
        X = merged_df[['price_diesel']]
        y = merged_df[f'price_{food_item.replace(" ", "_")}']

        # Perform linear regression
        model = LinearRegression()
        model.fit(X, y)

        # Plot the results
        plt.figure(figsize=(10, 6))
        plt.scatter(X, y, color='blue')
        plt.plot(X, model.predict(X), color='red')
        plt.title(f'Impact of Diesel Prices on {food_item} Prices')
        plt.xlabel('Diesel Prices')
        plt.ylabel(f'{food_item} Prices')
        plt.show()

    # Example usage
    for food_item in ['Wheat flour', 'Rice (basmati, broken)', 'Sugar', 'Milk']:
        analyze_impact_of_diesel_on_food(food_item)

def part4():
    # Part 4: Compare all provinces data over time and find general trends
    # Prepare data for K-Means Clustering
    df_part4 = df[df['#item+name'].isin(food_items)]
    pivot_df = df_part4.pivot_table(values='price', index='date', columns='#adm1+name', aggfunc='mean')
    pivot_df.fillna(method='ffill', inplace=True)

    # Determine number of provinces and apply clustering
    num_provinces = pivot_df.shape[1]
    n_clusters = min(num_provinces, 5)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    clusters = kmeans.fit_predict(pivot_df.T)
    province_cluster_map = {province: f'Cluster {cluster}' for province, cluster in zip(pivot_df.columns, clusters)}
    df_part4 = df[df['#item+name'].isin(food_items)].copy()
    df_part4['cluster'] = df_part4['#adm1+name'].map(province_cluster_map)

    # Pivot data for visualization and plot
    pivot_df_for_plot = df_part4.pivot_table(values='price', index='date', columns=['#adm1+name', 'cluster'], aggfunc='mean')
    plt.figure(figsize=(15, 8))
    for column in pivot_df_for_plot.columns:
        province, cluster = column
        sns.lineplot(data=pivot_df_for_plot[column], label=f'{province} ({cluster})')
    plt.title('Food Price Trends by Province and Cluster')
    plt.xlabel('Date')
    plt.ylabel('Average Price')
    plt.legend(title='Province (Cluster)', bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

output = widgets.Output()

# Create buttons with styling
button1 = widgets.Button(description="General Trend", button_style='info')
button2 = widgets.Button(description="Significant Relationship", button_style='success')
button3 = widgets.Button(description="Increased Oil Prices", button_style='warning')
button4 = widgets.Button(description="Provinces Data", button_style='danger')

# Define button click event handlers
def on_button1_clicked(b):
    with output:
        output.clear_output()
        part1()

def on_button2_clicked(b):
    with output:
        output.clear_output()
        part2()

def on_button3_clicked(b):
    with output:
        output.clear_output()
        part3()

def on_button4_clicked(b):
    with output:
        output.clear_output()
        part4()

# Link buttons to event handlers
button1.on_click(on_button1_clicked)
button2.on_click(on_button2_clicked)
button3.on_click(on_button3_clicked)
button4.on_click(on_button4_clicked)

# Arrange buttons horizontally
hbox_buttons = widgets.HBox([button1, button2, button3, button4])

# Display the buttons and output
display(hbox_buttons)
display(output)

HBox(children=(Button(button_style='info', description='General Trend', style=ButtonStyle()), Button(button_stâ€¦

Output()

In [7]:
# Define global variables
food_items = ['Rice (basmati, broken)', 'Wheat flour', 'Sugar', 'Milk', 'Wheat', 'Beans', 'Fish', 'Maize']


# Load and preprocess data
df = pd.read_csv('wfp_food_prices_pak.csv', header=1)
df['date'] = pd.to_datetime(df['#date'])
df['price'] = pd.to_numeric(df['#value'], errors='coerce')

# Define functions for each part
def part1():
    # Part 1: Find the general trend in food prices over time
    # Selecting specific food items for analysis
#     df_part1 = df[df['#item+name'].isin(food_items)]
#     food_items = ['Rice (basmati, broken)', 'Wheat flour', 'Sugar', 'Milk', 'Wheat', 'Beans', 'Fish', 'Maize']
    df_part1 = df[df['#item+name'].isin(food_items)]

    # Aggregate prices per month
    monthly_prices = df_part1.groupby(pd.Grouper(key='date', freq='M'))['price'].mean().reset_index()

    # Apply Linear Regression to identify trends
    X = monthly_prices.index.values.reshape(-1, 1)
    y = monthly_prices['price']
    model = LinearRegression()
    model.fit(X, y)
    trend = model.predict(X)
    data = pd.read_csv('wfp_food_prices_pak.csv')
    data

    # Visualize the trend
    plt.figure(figsize=(12, 6))
    plt.scatter(monthly_prices['date'], monthly_prices['price'], color='lightblue', label='Monthly Average Prices')
    plt.plot(monthly_prices['date'], trend, color='red', label='Trend')
    plt.title('General Trend in Food Prices Over Time')
    plt.xlabel('Date')
    plt.ylabel('Average Price')
    plt.legend()
    plt.show()
