In [1]:
#importing required modules
import pandas as pd
import numpy as np
import plotly.express as px

In [2]:
# The dataset was saved as "forbes_names.csv"
# use semicolon as a seperator
df = pd.read_csv("/kaggle/input/forbes-billionaires-monthly-list-of-2024/forbes_names.csv", delimiter=';')

df.head(10)

Unnamed: 0,Person,ID,Business,Industry,Gender,City,State,Country_of_residence,Continent,Citizenship,Self_made
0,Bernard Arnault,1,LVMH,Fashion and Retail,male,Paris,,France,Europe,France,False
1,Elon Musk,2,Tesla,Automotive,male,Austin,Texas,USA,North America,USA,True
2,Jeff Bezos,3,Amazon,Technology,male,Seattle,Washington,USA,North America,USA,True
3,Larry Ellison,4,Oracle,Technology,male,Lanai,Hawaii,USA,North America,USA,True
4,Warren Buffett,5,Berkshire Hathaway,Finance and Investments,male,Omaha,Nebraska,USA,North America,USA,True
5,Bill Gates,6,Microsoft,Technology,male,Medina,Washington,USA,North America,USA,True
6,Michael Bloomberg,7,Bloomberg LP,Media and Entertainment,male,New York City,New York,USA,North America,USA,True
7,Carlos Slim Helu,8,Telecom,Telecom,male,Mexico City,,Mexico,North America,Mexico,True
8,Mukesh Ambani,9,Diversified,Diversified,male,Mumbai,,India,Asia,India,False
9,Steve Ballmer,10,Microsoft,Technology,male,Hunts Point,Washington,USA,North America,USA,True


In [3]:

# Update column names for ease
df.columns = ['name', 'id', 'business', 'industry', 'gender','city','state', 'country', 'continent', 'citzenship', 'self-made' ]
# View first 10 rows 
df.head(10)

Unnamed: 0,name,id,business,industry,gender,city,state,country,continent,citzenship,self-made
0,Bernard Arnault,1,LVMH,Fashion and Retail,male,Paris,,France,Europe,France,False
1,Elon Musk,2,Tesla,Automotive,male,Austin,Texas,USA,North America,USA,True
2,Jeff Bezos,3,Amazon,Technology,male,Seattle,Washington,USA,North America,USA,True
3,Larry Ellison,4,Oracle,Technology,male,Lanai,Hawaii,USA,North America,USA,True
4,Warren Buffett,5,Berkshire Hathaway,Finance and Investments,male,Omaha,Nebraska,USA,North America,USA,True
5,Bill Gates,6,Microsoft,Technology,male,Medina,Washington,USA,North America,USA,True
6,Michael Bloomberg,7,Bloomberg LP,Media and Entertainment,male,New York City,New York,USA,North America,USA,True
7,Carlos Slim Helu,8,Telecom,Telecom,male,Mexico City,,Mexico,North America,Mexico,True
8,Mukesh Ambani,9,Diversified,Diversified,male,Mumbai,,India,Asia,India,False
9,Steve Ballmer,10,Microsoft,Technology,male,Hunts Point,Washington,USA,North America,USA,True


In [4]:
# Filtering data for Male and Female
df_M = df[df['gender'] == 'male']
df_F = df[df['gender'] == 'female']

# Getting the count
n_M = len(df_M)
n_F = len(df_F)

data = [n_M, n_F] 
labels = ['Male', 'Female']  

fig = px.pie(
    values=data, 
    names=labels, 
    title="Gender Distribution",
    hole=0.3  
)

fig.update_traces(
    pull=[0,0.2], # Pull Slice for 'Female'
    textinfo = 'label+percent', textposition='outside'
)

fig.show()

In [5]:
print(df['self-made'].unique())


[False True nan]


In [6]:
import pandas as pd
import plotly.express as px

# Ensure that 'self-made' column is treated as strings, and handle missing values
df['self-made'] = df['self-made'].astype(str).replace('nan', 'NaN')  # Convert to string and replace NaN

# Filter data for 'True' (Self-made) and 'False' (Inherited)
df_Y = df[df['self-made'] == 'True']
df_N = df[df['self-made'] == 'False']

# Get the counts
n_Y = len(df_Y)
n_N = len(df_N)

# Prepare data and labels for the pie chart
data = [n_Y, n_N]
labels = ['Self-made', 'Inherited']

# Create the pie chart
fig = px.pie(
    values=data, 
    names=labels, 
    title="Origin of Wealth Distribution",
    hole=0.3
)

# Customize the pie chart
fig.update_traces(
    pull=[0, 0.2],  # Pull the 'Inherited' slice
    textinfo='label+percent', 
    textposition='outside'
)

# Display the chart
fig.show()



In [7]:
import pandas as pd
import plotly.express as px

# Assuming df['country'] exists and has valid data
country_counts = df['country'].value_counts()

# Selecting Top 6 Countries with most Billionaires
top_countries = country_counts.head(6)

# Categorizing the rest as 'Others'
rest_sum = country_counts.iloc[6:].sum()  # Use .iloc to select all rows from the 7th onward
country_distribution = pd.concat([top_countries, pd.Series({'Others': rest_sum})])  # Use pd.concat

# Create a pie chart
fig = px.pie(
    values=country_distribution.values, 
    names=country_distribution.index, 
    title="Global Distribution of Billionaires by Country", 
    hole=0.5
)

# Pulling the slice for 'Others'
fig.update_traces(
    pull=[0.1 if name == 'Others' else 0 for name in country_distribution.index],
    textinfo='label+percent',
    textposition='outside'
)

# Display the chart
fig.show()
