In [1]:
import pandas as pd

# Load dataset (upload adult.data.csv to Colab first)
from google.colab import files
uploaded = files.upload()  # choose adult.data.csv

# Read CSV
df = pd.read_csv("adult.data.csv")

# 1️⃣ Count people by race
race_count = df['race'].value_counts()

# 2️⃣ Average age of men
average_age_men = round(df[df['sex']=='Male']['age'].mean(), 1)

# 3️⃣ Percentage with Bachelor's degree
percentage_bachelors = round((df['education']=='Bachelors').mean()*100, 1)

# 4️⃣ Rich percentage by education
higher_education = df['education'].isin(['Bachelors','Masters','Doctorate'])
lower_education = ~higher_education

higher_education_rich = round((df[higher_education]['salary']=='>50K').mean()*100, 1)
lower_education_rich = round((df[lower_education]['salary']=='>50K').mean()*100, 1)

# 5️⃣ Minimum work hours per week
min_work_hours = df['hours-per-week'].min()

# 6️⃣ Rich percentage among minimum hour workers
num_min_workers = df[df['hours-per-week']==min_work_hours]
rich_percentage = round((num_min_workers['salary']=='>50K').mean()*100, 1)

# 7️⃣ Country with highest % of rich people
country_salary = df.groupby('native-country')['salary'].apply(lambda x: (x=='>50K').mean())
highest_earning_country = country_salary.idxmax()
highest_earning_country_percentage = round(country_salary.max()*100, 1)

# 8️⃣ Top occupation in India for >50K
top_IN_occupation = df[(df['native-country']=='India') & (df['salary']=='>50K')]['occupation'].mode()[0]

# ✅ Print results
print("Race count:\n", race_count)
print("Average age of men:", average_age_men)
print("Percentage with Bachelor's:", percentage_bachelors)
print("Higher education rich:", higher_education_rich)
print("Lower education rich:", lower_education_rich)
print("Min work hours:", min_work_hours)
print("Rich percentage among min hours:", rich_percentage)
print("Highest earning country:", highest_earning_country)
print("Highest earning country percentage:", highest_earning_country_percentage)
print("Top occupation in India:", top_IN_occupation)


KeyboardInterrupt: 