# Data Exploration - Plastic Waste Flow Analysis

**Advanced Data Analytics Project**  
**Team:** Tarun S, Adityaa Kumar H, Akshay P Shetti

This notebook explores the three main datasets:
1. Global plastics production (1950-2019)
2. Mismanaged plastic waste per capita (2019)
3. UN Comtrade plastic waste trade flows (2012-2023)


In [None]:
# Import libraries
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

from src.data_loader import DataLoader

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set plot style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

%matplotlib inline


## 1. Load Datasets


In [None]:
# Initialize data loader
loader = DataLoader()

# Load all datasets (sample trade data for faster loading)
data = loader.load_all_data(trade_sample_frac=0.1)

# Get summary statistics
summary = loader.get_summary_statistics(data)

print("\n" + "="*70)
print("SUMMARY STATISTICS")
print("="*70)

for dataset_name, stats in summary.items():
    print(f"\n{dataset_name.upper()}:")
    for key, value in stats.items():
        print(f"  {key}: {value}")


## 2. Explore Production Data


In [None]:
production = data['production']
print("Production Data Shape:", production.shape)
print("\nFirst 10 rows:")
display(production.head(10))
print("\nBasic Statistics:")
display(production.describe())

# Visualize
fig = px.line(production, x='Year', y='Production_Tonnes', 
              title='Global Plastic Production (1950-2019)')
fig.update_traces(line_color='#2E86AB', line_width=3)
fig.show()
