In [None]:
#import libraries 
import numpy as np
import pandas as pd
import geopandas as gpd
from matplotlib import cm
import matplotlib.pyplot as plt
import seaborn as sns
from IPython import display
from tqdm import tqdm
import imageio.v2 as imageio
import glob
import warnings
warnings.filterwarnings('ignore')
cmap = cm.get_cmap('rocket')


#Load and Inspect Data 
data = pd.read_csv('sugar_consumption_dataset.csv')
globe = gpd.read_file('/kaggle/input/shpfile/World_Map.shp')
data.info()

#style 
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 26 columns):

#Data Exploration
data.describe().style.background_gradient()
data.describe(include='O')

#identify and visualize missing datas
missing_df = data.isnull().sum().reset_index()
missing_df.columns = ['features','count']
missing_df['count_%'] = missing_df['count']/data.shape[0]*100
missing_df[missing_df['count']>0].style.bar(subset=['count'], color='red')

#display data
data.head()

global_trend = data.groupby('Year')['Total_Sugar_Consumption'].agg(['mean','median','std'])
sns.set(style="whitegrid")

# Create the figure
plt.figure(figsize=(8, 4))

# Plot mean
plt.plot(global_trend.index, global_trend['mean'], label='Mean', color='#D2691E', linewidth=2)

# Plot median
plt.plot(global_trend.index, global_trend['median'], label='Median', color='#6A5ACD', linestyle='--', linewidth=2)

# Plot shaded area for standard deviation (mean ± std)
plt.fill_between(global_trend.index,
                 global_trend['mean'] - global_trend['std'],
                 global_trend['mean'] + global_trend['std'],
                 color='#D2691E',
                 alpha=0.2,
                 label='±Std Dev')

# Labels and legend
plt.title("Global Sugar Consumption Trend Over Time", fontsize=18)
plt.xlabel("Year")
plt.ylabel("Sugar Consumption")
plt.legend()
plt.tight_layout()
plt.show()


# data preparation
   Converts country names in both datasets (data and globe) to lowercase to ensure proper merging
data['Country'] = data['Country'].apply(str.lower)
globe['NAME'] = globe['NAME'].apply(str.lower)

   
consumption_ = data['Total_Sugar_Consumption'].median()

agg_data = data.groupby('Country')['Total_Sugar_Consumption'].mean().rename('Consumption').reset_index()

# merging and handling missing datas
merge_df = pd.merge(globe,agg_data,how='left',left_on=['NAME'],right_on=['Country'])
merge_df.fillna(consumption_,inplace=True)

    # Visualization
plt.figure(figsize=(8,8))
plt.subplot(211)
merge_df.plot(column='Consumption',cmap='Reds', linewidth=0.3, ax=plt.gca(), edgecolor='0.9')
plt.xticks([])
plt.yticks([])
plt.box(False)
plt.title(f'Average Global Sugar Consumption Per Country',fontsize=18)

       # Bar Chart (Total Consumption)
plt.subplot(212)
agg_data = data.groupby('Country')['Total_Sugar_Consumption'].sum().rename('Consumption').reset_index()
norm = plt.Normalize(agg_data.Consumption.min(), agg_data.Consumption.max())
bars = sns.barplot(x=agg_data.Country,y=agg_data.Consumption)
bars.bar_label(
    bars.containers[0],
    fontsize=12,
    label_type='center',
    labels=[f'{x/1e9:.1f}B' for x in agg_data.Consumption],
    rotation=90
)

     # A colored bar chart where each bar represents a country's total sugar consumption.
for bar in bars.patches:
    height = bar.get_height()
    bar.set_facecolor(cmap(norm(height)))

plt.yticks([])
plt.box(False)
plt.title(f'Total Global Sugar Consumption Per Country',fontsize=18)

plt.tight_layout()
plt.show()


   Animated Yearly Trends (1960–2023)
   data preparation
data['Country'] = data['Country'].apply(str.lower)
globe['NAME'] = globe['NAME'].apply(str.lower)
min_consumption = data['Total_Sugar_Consumption'].min()

        # Loop Through Each Year
for year in range(1960,2024):
    agg_data = data[data.Year==year].groupby('Country')['Total_Sugar_Consumption'].sum().rename('Consumption').reset_index()

        # Merge & Fill Missing Data
    merge_df = pd.merge(globe,agg_data,how='left',left_on=['NAME'],right_on=['Country'])
    merge_df.fillna(min_consumption,inplace=True)

       # visualisation
    display.clear_output()
    plt.figure(figsize=(8,4))
    merge_df.plot(column='Consumption',cmap='Reds', linewidth=0.3, ax=plt.gca(), edgecolor='0.9')
    plt.xticks([])
    plt.yticks([])
    plt.box(False)
    plt.title(f'Global Sugar Consumption Year:{year}',fontsize=18)
    
    plt.tight_layout()
    plt.savefig('image_year_{:04d}.png'.format(year))
    plt.show()


anim_file = 'consumption.gif'

with imageio.get_writer(anim_file, mode='I',duration=1, loop=0) as writer:
    filenames = glob.glob('/kaggle/working/image*.png')
    filenames = sorted(filenames)
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
    image = imageio.imread(filename)
    writer.append_data(image)