## Basic Data visualizations

### Importing libraries, dataset and checking for missing values

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.read_csv('/kaggle/input/world-top-billionaires/billionaires.csv')

In [None]:
df.columns

In [None]:
df.isnull().sum()

In [None]:
df.dropna(axis=0, inplace=True)

df.isnull().sum()

### Let's visualize the gender distribution of billionaires

In [None]:
sns.countplot(x='Demographics Gender', data=df)
plt.title('Billionaire Gender Distribution')
plt.show()

### What about the sectors?

In [None]:
# Pie chart of billionaire sectors
sector_counts = df['Company Sector'].value_counts()
plt.pie(sector_counts, labels=sector_counts.index, autopct='%1.1f%%')
plt.title('Billionaire Sectors')
plt.axis('equal')
plt.show()

### There are too many sectors. Let's visualize the top 10:

In [None]:
top_10_sectors = df['Company Sector'].value_counts().nlargest(10)

colors = ['#ff9999', 
          '#66b3ff', 
          '#99ff99', 
          '#ffcc99', 
          '#c2c2f0', 
          '#ffb3e6', 
          '#c2d6d6', 
          '#ffb3b3', 
          '#cc99ff', 
          '#d6d6c2']

plt.pie(top_10_sectors, 
        labels=top_10_sectors.index, 
        autopct='%1.1f%%', 
        colors=colors, 
        startangle=90)

center_circle = plt.Circle((0, 0), 0.70, fc='white')
fig = plt.gcf()
fig.gca().add_artist(center_circle)

plt.axis('equal')
plt.title('Top 10 Billionaire Sectors', fontweight='bold')
plt.subplots_adjust(top=1.2)

plt.show()


### How did the number of billionaires changed over time?

In [None]:
df['Year'].unique()

In [None]:
yearly_counts = df['Year'].value_counts().sort_index()

plt.plot(yearly_counts.index, yearly_counts.values)
plt.title('Billionaire Count Over Time')
plt.xlabel('Year')
plt.ylabel('Count')
plt.xticks(yearly_counts.index.astype(int))

plt.show()


### Let's visualize how the billionaires formed their wealth and the change over time:

In [None]:
df['Wealth How Category'].unique()

In [None]:
wealth_composition = df[df['Wealth How Category'] != '0'].groupby(['Year', 'Wealth How Category']).size().unstack()

wealth_composition.plot(kind='bar', stacked=True)

plt.title('Wealth Composition of Billionaires by Year')
plt.xlabel('Year')
plt.ylabel('Count')

plt.legend(title='Wealth How Category')

plt.show()
