In [8]:
pip install plotly

Collecting plotly
  Downloading plotly-6.0.1-py3-none-any.whl.metadata (6.7 kB)
Collecting narwhals>=1.15.1 (from plotly)
  Downloading narwhals-1.34.1-py3-none-any.whl.metadata (9.2 kB)
Downloading plotly-6.0.1-py3-none-any.whl (14.8 MB)
   ---------------------------------------- 0.0/14.8 MB ? eta -:--:--
   -- ------------------------------------- 1.0/14.8 MB 7.3 MB/s eta 0:00:02
   -- ------------------------------------- 1.0/14.8 MB 7.3 MB/s eta 0:00:02
   --- ------------------------------------ 1.3/14.8 MB 2.3 MB/s eta 0:00:06
   --- ------------------------------------ 1.3/14.8 MB 2.3 MB/s eta 0:00:06
   ----- ---------------------------------- 2.1/14.8 MB 1.9 MB/s eta 0:00:07
   --------- ------------------------------ 3.4/14.8 MB 2.7 MB/s eta 0:00:05
   ----------- ---------------------------- 4.2/14.8 MB 2.9 MB/s eta 0:00:04
   ------------ --------------------------- 4.5/14.8 MB 2.7 MB/s eta 0:00:04
   ------------ --------------------------- 4.5/14.8 MB 2.7 MB/s eta 0:00:0


[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [6]:
pip install squarify

Note: you may need to restart the kernel to use updated packages.
Collecting squarify
  Downloading squarify-0.4.4-py3-none-any.whl.metadata (600 bytes)
Downloading squarify-0.4.4-py3-none-any.whl (4.1 kB)
Installing collected packages: squarify
Successfully installed squarify-0.4.4



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
# 📘 Data Visualization Project Summary

# ================================
# 📦 1. Import Libraries
# ================================
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import folium
import squarify
import plotly.express as px

%matplotlib inline

# ================================
# 📂 2. Load Dataset
# ================================
df = pd.read_csv("2016-2022.csv")

# ================================
# 🔹 Unit I: Statistics and Python Basics
# ================================
print(df.describe())

sns.histplot(df['fatalities'], bins=50)
plt.title("Fatalities Distribution")
plt.show()

event_type_counts = df['event_type'].value_counts()
sns.barplot(x=event_type_counts.values, y=event_type_counts.index)
plt.title("Event Type Distribution")
plt.show()

events_per_year = df['year'].value_counts().sort_index()
sns.lineplot(x=events_per_year.index, y=events_per_year.values, marker='o')
plt.title("Number of Events per Year")
plt.show()

sns.violinplot(x='event_type', y='fatalities', data=df, inner="quartile")
plt.title("Distribution of Fatalities by Event Type")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

# ================================
# 🔹 Unit II: Data Manipulation with Pandas
# ================================
categorical_cols = df.select_dtypes(include=['object']).columns
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[categorical_cols] = df[categorical_cols].fillna('Unknown')
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].median())

fatalities_by_region_year = df.groupby(['region', 'year'])['fatalities'].sum().unstack()
sns.heatmap(fatalities_by_region_year, annot=True, cmap='YlOrRd')
plt.title("Total Fatalities by Region and Year")
plt.show()

pivot_sub_events = df.pivot_table(index='sub_event_type', columns='year', values='data_id', aggfunc='count').fillna(0)
pivot_sub_events.plot(kind='barh', stacked=True, figsize=(14, 10))
plt.title("Sub-Event Types by Year")
plt.tight_layout()
plt.show()

df['actor1'] = df['actor1'].fillna('Unknown')
df['actor2'] = df['actor2'].fillna('Unknown')
df['police_event'] = df['actor1'].str.upper().str.contains('POLICE')
df['civilian_event'] = df['actor2'].str.upper().str.contains('CIVILIAN')

# ================================
# 🔹 Unit III: Matplotlib Visualizations
# ================================
year_filter = 2021
event_counts = df[df['year'] == year_filter]['event_type'].value_counts()
plt.figure(figsize=(8, 8))
plt.pie(event_counts, labels=event_counts.index, autopct='%1.1f%%', startangle=140)
plt.title(f"Event Type Distribution in {year_filter}")
plt.axis('equal')
plt.show()

police_trend = df[df['police_event']].groupby('year')['data_id'].count()
civilian_trend = df[df['civilian_event']].groupby('year')['data_id'].count()

plt.plot(police_trend.index, police_trend.values, marker='o', label='Police Involved')
plt.plot(civilian_trend.index, civilian_trend.values, marker='o', label='Civilian Targeted')
plt.title("Events Involving Police and Civilians (2016–2022)")
plt.xlabel("Year")
plt.ylabel("Number of Events")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# ================================
# 🌟 Attractive and Unique Visualizations
# ================================

# Treemap of Event Types by Fatalities
tree_df = df.groupby('event_type')['fatalities'].sum().reset_index()
squarify.plot(sizes=tree_df['fatalities'], label=tree_df['event_type'], alpha=.8)
plt.axis('off')
plt.title("Treemap: Fatalities by Event Type")
plt.show()

# Sunburst Chart using Plotly
sunburst_df = df.groupby(['region', 'event_type'])['fatalities'].sum().reset_index()
fig = px.sunburst(sunburst_df, path=['region', 'event_type'], values='fatalities', title="Sunburst Chart: Fatalities by Region and Event Type")
fig.show()

# Animated Scatter Plot
scatter_df = df[['year', 'latitude', 'longitude', 'fatalities', 'event_type']].dropna()
fig = px.scatter(scatter_df, x='longitude', y='latitude', color='event_type', size='fatalities', animation_frame='year', title="Animated Scatter: Events Over Time")
fig.show()

# ================================
# 🔹 Unit IV: Advanced Visualization with Seaborn & Folium
# ================================
sns.heatmap(fatalities_by_region_year, annot=True, fmt='.0f', cmap='YlOrRd')
plt.title("Total Fatalities by Region and Year")
plt.tight_layout()
plt.show()

sample_df = df.dropna(subset=['latitude', 'longitude']).sample(500)
m = folium.Map(location=[22.5, 78.9], zoom_start=5)
for idx, row in sample_df.iterrows():
    folium.CircleMarker(
        location=[row['latitude'], row['longitude']],
        radius=4,
        popup=row['event_type'],
        color='crimson',
        fill=True,
        fill_color='crimson'
    ).add_to(m)

m


ModuleNotFoundError: No module named 'plotly'