In [None]:
# Nobel Prize Data Analysis
# Basic exploratory analysis

import pandas as pd
import numpy as np

# Load the dataset
nobel = pd.read_csv("laureates.csv")
print(f"Dataset loaded: {nobel.shape[0]} rows, {nobel.shape[1]} columns")
print("\nFirst few rows:")
print(nobel.head())

print("\n\nColumns in dataset:")
print(list(nobel.columns))

print("\n\nBasic information:")
print(nobel.info())

print("\n\nMissing values:")
print(nobel.isnull().sum())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

# Set style for better looking charts
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("husl")

print("Creating charts for website...")
print("=" * 50)

# Chart 1: Nobel Prizes by Category
plt.figure(figsize=(12, 7))
if 'category' in nobel.columns:
    category_counts = nobel['category'].value_counts()
    bars = plt.bar(category_counts.index, category_counts.values)
    plt.title('Nobel Prizes by Category', fontsize=18, fontweight='bold', pad=20)
    plt.xlabel('Category', fontsize=14)
    plt.ylabel('Number of Prizes', fontsize=14)
    plt.xticks(rotation=45, ha='right')
    
    # Add value labels on bars
    for bar, count in zip(bars, category_counts.values):
        plt.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 3, 
                str(count), ha='center', va='bottom', fontweight='bold', fontsize=11)
    
    plt.tight_layout()
    plt.savefig('charts/nobel_by_category.png', dpi=300, bbox_inches='tight')
    print("‚úÖ Chart 1 saved: charts/nobel_by_category.png")
else:
    print("‚ùå 'category' column not found for Chart 1")

# Chart 2: Prizes by Year
plt.figure(figsize=(14, 7))
if 'year' in nobel.columns:
    # Convert year to numeric if needed
    nobel['year'] = pd.to_numeric(nobel['year'], errors='coerce')
    year_counts = nobel['year'].value_counts().sort_index()
    
    plt.plot(year_counts.index, year_counts.values, linewidth=3, marker='o', markersize=5, 
             color='#2E86AB', markerfacecolor='#A23B72')
    plt.title('Nobel Prizes Awarded Each Year', fontsize=18, fontweight='bold', pad=20)
    plt.xlabel('Year', fontsize=14)
    plt.ylabel('Number of Prizes', fontsize=14)
    plt.grid(True, alpha=0.3)
    
    # Highlight recent decade
    if year_counts.index.max() > 2010:
        plt.axvspan(2010, year_counts.index.max(), alpha=0.1, color='green', label='Recent Decade')
    
    plt.tight_layout()
    plt.savefig('charts/nobel_by_year.png', dpi=300, bbox_inches='tight')
    print("‚úÖ Chart 2 saved: charts/nobel_by_year.png")
else:
    print("‚ùå 'year' column not found for Chart 2")

# Chart 3: Gender Distribution
plt.figure(figsize=(10, 10))
if 'gender' in nobel.columns:
    gender_counts = nobel['gender'].value_counts()
    
    # Create pie chart
    colors = ['#4ECDC4', '#FF6B6B', '#C7F464']  # Blue, Red, Green
    wedges, texts, autotexts = plt.pie(gender_counts.values, labels=gender_counts.index, 
                                       autopct='%1.1f%%', startangle=90, colors=colors,
                                       textprops={'fontsize': 14})
    
    # Style the percentage text
    for autotext in autotexts:
        autotext.set_color('white')
        autotext.set_fontweight('bold')
    
    plt.title('Nobel Prize Winners by Gender', fontsize=18, fontweight='bold', pad=20)
    
    plt.tight_layout()
    plt.savefig('charts/gender_distribution.png', dpi=300, bbox_inches='tight')
    print("‚úÖ Chart 3 saved: charts/gender_distribution.png")
else:
    print("‚ùå 'gender' column not found for Chart 3")

# Chart 4: Top 10 Birth Countries
plt.figure(figsize=(12, 8))
if 'birth_country' in nobel.columns:
    country_counts = nobel['birth_country'].value_counts().head(10)
    
    bars = plt.barh(range(len(country_counts)), country_counts.values)
    plt.yticks(range(len(country_counts)), country_counts.index)
    plt.title('Top 10 Countries by Nobel Prizes', fontsize=18, fontweight='bold', pad=20)
    plt.xlabel('Number of Prizes', fontsize=14)
    
    # Add value labels
    for i, (bar, count) in enumerate(zip(bars, country_counts.values)):
        plt.text(count + 1, i, str(count), va='center', fontweight='bold', fontsize=11)
    
    plt.gca().invert_yaxis()  # Highest on top
    plt.tight_layout()
    plt.savefig('charts/top_countries.png', dpi=300, bbox_inches='tight')
    print("‚úÖ Chart 4 saved: charts/top_countries.png")
else:
    print("‚ùå 'birth_country' column not found for Chart 4")

print("=" * 50)
print("üéâ Chart creation complete!")
print(f"Check your charts folder: ls charts/")

# Show a preview of the first chart
plt.figure(figsize=(10, 5))
if 'category' in nobel.columns:
    category_counts = nobel['category'].value_counts().head()
    plt.bar(category_counts.index, category_counts.values, color='skyblue')
    plt.title('Preview: Top Categories', fontsize=14)
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()