In [36]:
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st

# Study data files
state_age_path = "age_states.csv"
with open(state_age_path, 'r') as file:
    state_age = [next(file) for _ in range(5)]
state_age = pd.read_csv(state_age_path, skiprows=2)
percentage_columns = ['Year']+['State']+[col for col in state_age.columns if 'Percentage' in col]
state_age_df = state_age[percentage_columns]
state_age_df.head()

Unnamed: 0,Year,State,Percentage
0,2000,Median of States,6.0
1,2001,Median of States,6.3
2,2002,Median of States,6.7
3,2003,Median of States,7.0
4,2004,Median of States,6.9


In [37]:
age_analysis= state_age_df.describe()
age_analysis

Unnamed: 0,Year,State,Percentage
count,1211,1210,1210
unique,23,55,104
top,2000,Median of States,7
freq,55,22,32


In [38]:
# Load and prepare the DataFrame
state_age_copy = state_age_df.copy()
state_age_copy['Year'] = pd.to_numeric(state_age_copy['Year'], errors='coerce')

# Assuming 'Percentage' is the column you want to compare, convert it to numeric
state_age_copy['Percentage'] = pd.to_numeric(state_age_copy['Percentage'], errors='coerce')

# List of unique states
unique_states = state_age_copy['State'].unique()

# Dictionary for differences
state_differences = {}

# Iterate through each state
for state in unique_states:
    # Filter for years 2000 and 2020
    age_2000 = state_age_copy[(state_age_copy['Year'] == 2000) & (state_age_copy['State'] == state)]
    age_2020 = state_age_copy[(state_age_copy['Year'] == 2020) & (state_age_copy['State'] == state)]

    # Calculate difference if data is available
    if not age_2000.empty and not age_2020.empty:
        difference = age_2020['Percentage'].iloc[0] - age_2000['Percentage'].iloc[0]
        state_differences[state] = difference
    else:
        state_differences[state] = "Data not available"

# Convert to DataFrame
differences_df = pd.DataFrame.from_dict(state_differences, orient='index', columns=['Percentage Difference'])

differences_df

Unnamed: 0,Percentage Difference
Median of States,3.4
Guam,
Virgin Islands of the U.S.,
Alabama,5.6
Alaska,3.0
Arizona,3.8
Arkansas,5.8
California,2.1
Colorado,1.9
Connecticut,3.0


In [39]:
state_age_copy['Percentage'] = pd.to_numeric(state_age_copy['Percentage'], errors='coerce')

# Group by 'State' and use describe on 'Percentage' column
state_analysis = state_age_copy.groupby('State')['Percentage'].describe()

state_analysis
    

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Alabama,22.0,10.822727,1.830933,7.3,9.425,11.0,12.425,13.3
Alaska,22.0,6.740909,1.300058,4.4,5.85,7.2,7.675,8.7
Arizona,22.0,8.322727,1.426261,5.9,7.35,8.65,9.675,10.1
Arkansas,22.0,9.527273,1.986666,5.9,7.7,9.9,11.1,12.4
California,22.0,8.777273,1.11992,6.6,7.85,8.95,9.675,10.9
Colorado,22.0,6.018182,0.803348,4.6,5.275,6.15,6.6,7.2
Connecticut,22.0,7.145455,1.257635,5.2,6.0,7.1,8.2,9.3
Delaware,22.0,8.6,1.344477,6.3,7.55,8.5,9.875,10.9
District of Columbia,22.0,8.540909,0.582817,7.4,8.225,8.4,9.0,9.5
Florida,21.0,8.552381,1.170307,6.2,7.7,8.6,9.4,10.5


In [41]:
stat = "mean"

state_stat_ordered = state_analysis.sort_values(by=stat, ascending=False)

state_stat_ordered

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Puerto Rico,22.0,12.795455,1.620546,9.3,12.0,13.0,14.075,15.2
Guam,18.0,12.461111,2.268749,8.7,11.225,11.8,14.075,16.6
Mississippi,22.0,11.331818,1.664871,7.6,10.65,11.65,12.775,13.7
West Virginia,22.0,10.963636,1.763261,7.0,9.85,10.95,12.65,13.4
Alabama,22.0,10.822727,1.830933,7.3,9.425,11.0,12.425,13.3
Tennessee,22.0,10.395455,1.597163,7.0,9.35,10.75,11.65,12.5
Louisiana,22.0,10.277273,1.813161,6.8,9.025,10.55,11.475,12.9
South Carolina,22.0,10.140909,1.416118,7.2,9.2,10.3,11.45,11.9
Texas,22.0,9.913636,1.648461,6.5,8.4,10.45,11.05,12.1
Kentucky,22.0,9.827273,1.889112,6.3,8.775,9.75,11.4,12.3
