In [None]:
from google.colab import drive
drive.mount("/content/gdrive", force_remount=True)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Read the data
df = pd.read_csv('/content/gdrive/MyDrive/ICE-V2-Dataset/Datasets/data_115birth_statistics.csv')

# Get females
df_females = df.loc[df.sex=='Female'].reset_index()

# Sort the dataframe by birth_location (ascending)
df_sorted = df_females.sort_values(by='birth_location')

# Group by birth_location and calculate the sum of number_of_births
grouped = df_sorted.groupby('birth_location')['number_of_births'].sum().to_dict()


# Create a dictionary to map state_code to colors
state_colors = {'IL':'red', 'AZ':'brown', 'WA':'gold',
                'FL': 'blue', 'GA':'g', 'OR': 'orangered', 'MD': "cyan",
                'NY':'royalblue', 'CA':'maroon', 'DC':"darkred",
                }  # You can add more colors for other state codes



# Create the bar plot
plt.figure(figsize=(14, 6))
for birth_location, number_of_births in grouped.items():
    state_code = df_sorted[df_sorted['birth_location'] == birth_location]['state_code'].values[0]
    color = state_colors.get(state_code, 'gray')  # Default to gray if state code is not in the dictionary
    plt.bar(birth_location, number_of_births, color=color, label=state_code)

# Set labels and title
plt.xlabel('Birth Location')
plt.ylabel('Sum of Number of Births')
plt.title('Sum of Number of Births by Birth Location (Colored by State Code)')

# Adjust x-axis limits for spacing to the right
plt.xlim(0, 11)

# Move the legend outside the plot area to avoid overlap
plt.legend(loc='upper left', bbox_to_anchor=(1, 1))

# Rotate x-axis labels for better readability
plt.xticks(rotation=45, ha='right')

# Show the plot
plt.tight_layout()
plt.show()
