In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read the data
df = pd.read_csv('/content/gdrive/MyDrive/ICE-V2-Dataset/Datasets/data_115birth_statistics.csv')

# Use value_counts() on state_code and filter states with at least two occurrences
state_counts = df['state_code'].value_counts()
states_to_keep = state_counts[state_counts >= 2].index

# Filter the DataFrame to keep only the selected states
df_filtered = df[df['state_code'].isin(states_to_keep)].reset_index()

df_filtered["key"] =  df_filtered.sex.str[0] + df_filtered.highest_education_level.str[0]

colors_dict = {'MB':'cyan', 'MM':'blue', 'MD':'midnightblue', 'FM':'pink', 'FD':'red'}

labels_dict = {'MB':'Male Bachelor', 'MM':'Male Masters', 'MD':'Male Doctorate', 'FM':'Female Masters', 'FD':'Female Doctorate'}


# Create a 1 row 3 columns grid of subplots
fig, axes = plt.subplots(1, 3, figsize=(15, 5))

# Loop through each state code and create a pie chart subplot
for i, state in enumerate(states_to_keep):
    state_df = df_filtered[df_filtered['state_code'] == state]
    crosstab = state_df['key'].value_counts()

    # Create pie chart
    ax = axes[i]
    labels = crosstab.index
    sizes = crosstab.values
    colors = [colors_dict[key] for key in labels]

    # Create legend using labels_dict
    legend_labels = [labels_dict[key] for key in labels]

    ax.pie(sizes, labels=labels, colors=colors, autopct='%1.1f%%', startangle=90)
    ax.set_title(f'Distribution in the State of {state}')
    ax.legend(legend_labels, loc='upper left', bbox_to_anchor=(1, 1))

# Adjust layout
plt.tight_layout()

# Show the plot
plt.show()