## Maternal Death Rates

### Importing Packages

In [1]:
import pandas as pd
from scipy import stats

In [3]:
# Import csv file into a pandas dataframe
df = pd.read_csv('maternal data #1.csv')

In [4]:
# Verify the size of df
print(df.shape)

(660, 12)


In [5]:
# Show the column headers
print(df.columns)

Index(['Data As Of', 'Jurisdiction', 'Group', 'Subgroup', 'Year of Death',
       'Month of Death', 'Time Period', 'Month Ending Date', 'Maternal Deaths',
       'Live Births', 'Maternal Mortality Rate', 'Footnote'],
      dtype='object')


In [8]:
# Show the first 5 rows of the dataframe
df.head()

Unnamed: 0,Data As Of,Jurisdiction,Group,Subgroup,Year of Death,Month of Death,Time Period,Month Ending Date,Maternal Deaths,Live Births,Maternal Mortality Rate,Footnote
0,10/6/24,United States,Total,Total,2019,1,12 month-ending,1/31/19,660,3787776,17.4,
1,10/6/24,United States,Total,Total,2019,2,12 month-ending,2/28/19,653,3783489,17.3,
2,10/6/24,United States,Total,Total,2019,3,12 month-ending,3/31/19,657,3771682,17.4,
3,10/6/24,United States,Total,Total,2019,4,12 month-ending,4/30/19,668,3772235,17.7,
4,10/6/24,United States,Total,Total,2019,5,12 month-ending,5/31/19,706,3767999,18.7,


In [9]:
# Show info about df
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 660 entries, 0 to 659
Data columns (total 12 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Data As Of               660 non-null    object 
 1   Jurisdiction             660 non-null    object 
 2   Group                    660 non-null    object 
 3   Subgroup                 660 non-null    object 
 4   Year of Death            660 non-null    int64  
 5   Month of Death           660 non-null    int64  
 6   Time Period              660 non-null    object 
 7   Month Ending Date        660 non-null    object 
 8   Maternal Deaths          589 non-null    object 
 9   Live Births              660 non-null    object 
 10  Maternal Mortality Rate  544 non-null    float64
 11  Footnote                 116 non-null    object 
dtypes: float64(1), int64(2), object(9)
memory usage: 62.0+ KB


## Categorical Data

In [10]:
# Show the first 10 rows of the "Group" column
df['Group'].head(10)

0    Total
1    Total
2    Total
3    Total
4    Total
5    Total
6    Total
7    Total
8    Total
9    Total
Name: Group, dtype: object

In [12]:
# Calculate the distinct values of the "Group" column
df['Group'].unique()

array(['Total', 'Age', 'Race and Hispanic origin'], dtype=object)

In [13]:
# Calculate the frequency of each category in the "Group" column
df['Group'].value_counts()

Group
Race and Hispanic origin    396
Age                         198
Total                        66
Name: count, dtype: int64

In [14]:
# Calcualate the proportions of each category in the "Group" column
df['Group'].value_counts(normalize=True)

Group
Race and Hispanic origin    0.6
Age                         0.3
Total                       0.1
Name: proportion, dtype: float64

In [15]:
# Calcualte the frequency of each category in the "Subgroup" column
df.Subgroup.value_counts()

Subgroup
Total                                                      66
Under 25 years                                             66
25-39 years                                                66
40 years and over                                          66
Hispanic                                                   66
American Indian or Alaska Native, Non-Hispanic             66
Asian, Non-Hispanic                                        66
Black, Non-Hispanic                                        66
Native Hawaiian or Other Pacific Islander, Non-Hispanic    66
White, Non-Hispanic                                        66
Name: count, dtype: int64

In [16]:
# Calculate the pecentage of each category in the "Subgroup" column
df.Subgroup.value_counts(normalize=True) * 100

Subgroup
Total                                                      10.0
Under 25 years                                             10.0
25-39 years                                                10.0
40 years and over                                          10.0
Hispanic                                                   10.0
American Indian or Alaska Native, Non-Hispanic             10.0
Asian, Non-Hispanic                                        10.0
Black, Non-Hispanic                                        10.0
Native Hawaiian or Other Pacific Islander, Non-Hispanic    10.0
White, Non-Hispanic                                        10.0
Name: proportion, dtype: float64

In [17]:
# Create a cross-tabulation of the "Group" and "Subgroup" columns
pd.crosstab(df.Group, df.Subgroup)

Subgroup,25-39 years,40 years and over,"American Indian or Alaska Native, Non-Hispanic","Asian, Non-Hispanic","Black, Non-Hispanic",Hispanic,"Native Hawaiian or Other Pacific Islander, Non-Hispanic",Total,Under 25 years,"White, Non-Hispanic"
Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Age,66,66,0,0,0,0,0,0,66,0
Race and Hispanic origin,0,0,66,66,66,66,66,0,0,66
Total,0,0,0,0,0,0,0,66,0,0


## Comparative numerical analysis

In [19]:
# Calculate the mean of the "Maternal Mortality Rate" column for each of the "Subgroup" categories
df.groupby('Subgroup')['Maternal Mortality Rate'].mean()

Subgroup
25-39 years                                                22.015152
40 years and over                                          92.350000
American Indian or Alaska Native, Non-Hispanic             95.728571
Asian, Non-Hispanic                                        13.721212
Black, Non-Hispanic                                        52.360606
Hispanic                                                   17.003030
Native Hawaiian or Other Pacific Islander, Non-Hispanic     0.000000
Total                                                      22.974242
Under 25 years                                             14.563636
White, Non-Hispanic                                        18.974242
Name: Maternal Mortality Rate, dtype: float64

## Numerical Data Analysis

In [22]:
# Calculate the mean of the "Maternal Mortality Rate" column
df['Maternal Mortality Rate'].mean()

np.float64(33.27518382352941)

In [24]:
# Calculate the sample standard deviation of the "Maternal Mortality Rate" column
df['Maternal Mortality Rate'].std()

np.float64(29.2671776694006)

In [25]:
# Describe the summary statistics of the "Maternal Mortality Rate" column
df['Maternal Mortality Rate'].describe()

count    544.000000
mean      33.275184
std       29.267178
min        0.000000
25%       14.900000
50%       19.800000
75%       42.675000
max      141.900000
Name: Maternal Mortality Rate, dtype: float64

In [26]:
# Describe the summary statistics of the "Maternal Mortality Rate" column for each category in the "Subgroup" categories
df.groupby('Subgroup')['Maternal Mortality Rate'].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
Subgroup,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
25-39 years,66.0,22.015152,4.555512,16.4,18.7,20.2,23.925,32.2
40 years and over,66.0,92.35,26.388816,56.3,72.5,79.35,119.525,141.9
"American Indian or Alaska Native, Non-Hispanic",14.0,95.728571,10.758212,77.6,92.025,94.3,100.0,118.7
"Asian, Non-Hispanic",66.0,13.721212,2.659469,9.1,11.925,13.35,15.1,19.8
"Black, Non-Hispanic",66.0,52.360606,9.240594,38.3,47.325,50.25,57.5,72.7
Hispanic,66.0,17.00303,5.489962,10.1,13.125,14.95,20.9,28.5
"Native Hawaiian or Other Pacific Islander, Non-Hispanic",2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Total,66.0,22.974242,4.906646,17.3,19.225,20.8,25.2,33.8
Under 25 years,66.0,14.563636,2.906896,10.8,12.6,13.25,16.225,21.4
"White, Non-Hispanic",66.0,18.974242,3.99343,14.2,15.95,17.8,20.075,28.0


In [28]:
# Calculate the correlation between the "Maternal Mortality Rate" and "Supgroup" columns of rows 68-133
df.loc[68:133, ['Maternal Mortality Rate', 'Subgroup']].corr()

ValueError: could not convert string to float: 'Under 25 years'