In [9]:
#imports
import pandas as pd
import numpy as np

In [10]:
#load original dfs
original_df = pd.read_csv("../data/aggregated_data.csv")

In [11]:
#methods to create change_by_year data

def generateChangeDataFeatureColumns(df, column):
    df[f'{column}_val_change'] = None 
    df[f'{column}_perc_change'] = None 
    # Create an empty DataFrame with the same columns as the original one 
    new_df = pd.DataFrame(columns=df.columns) 
    # Get unique states and years 
    states = df["state"].unique() 
    years = df["YEAR"].unique()
    # Iterate over states and years 
    for state in states: 
        for year in years:
            print(year - 1 in years) 
            if (year - 1) in years: 
                row_index = df[(df['state'] == state) & (df['YEAR'] == year)].index
                last_years_row = df[(df['state'] == state) & (df['YEAR'] == year - 1)] 
                if not row_index.empty and not last_years_row.empty: 
                    current_value = df.at[row_index[0], column]
                    last_value = last_years_row[column].values[0] 
                    # Calculate the value change and percentage change 
                    df.at[row_index[0], f'{column}_val_change'] = current_value - last_value 
                    df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
                # else: print(f"No data found for State: {state}, Year: {year} or Year: {year - 1}") 
            else: 
                print(f'No previous year found for Year: {year} in State: {state}')

    return df

def generateChangeDataTargetColumns(df, column):
    df[f'{column}_val_change'] = None 
    df[f'{column}_perc_change'] = None 
    # Create an empty DataFrame with the same columns as the original one 
    new_df = pd.DataFrame(columns=df.columns) 
    # Get unique states and years 
    states = df["state"].unique() 
    years = df["YEAR"].unique() 
    # Iterate over states and years 
    for state in states: 
        for year in years: 
            if year + 1 in years:
                row_index = df[(df['state'] == state) & (df['YEAR'] == year)].index
                next_years_row = df[(df['state'] == state) & (df['YEAR'] == year + 1)] 
                if not row_index.empty and not next_years_row.empty: 
                    current_value = df.at[row_index[0], column]
                    next_value = next_years_row[column].values[0] 
                    # Calculate the value change and percentage change 
                    df.at[row_index[0], f'{column}_val_change'] = next_value - current_value 
                    df.at[row_index[0], f'{column}_perc_change'] = (next_value- current_value) / current_value
                else: print(f"No data found for State: {state}, Year: {year} or Year: {year + 1}") 
            else: 
                print(f'No next year found for Year: {year} in State: {state}')  
    return df 
def fill_zeros_with_district_mean(df, column):
    usps_districts = {
        'Atlantic Area': ['CT', 'DE', 'MD', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'DC'],
        'Central Area': ['IL', 'IN', 'IA', 'KS', 'KY', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI'],
        'Southern Area': ['AL', 'AR', 'FL', 'GA', 'LA', 'MS', 'NC', 'OK', 'SC', 'TN', 'TX', 'VA', 'WV'],
        'Western Pacific Area': ['AK', 'AZ', 'CA', 'CO', 'HI', 'ID', 'MT', 'NV', 'NM', 'OR', 'UT', 'WA', 'WY']
    }
    # Add district information
    df = add_district_info(df)

    # Create a new column to store the district mean filled values
    df[f'{column}_district_mean_filled'] = df[column]

    # Iterate over districts and years
    for district, states in usps_districts.items():
        district_df = df[df['district'] == district]

        for year in df['YEAR'].unique():
            year_district_df = district_df[district_df['YEAR'] == year]
            
            # Calculate the district mean for the given year
            district_mean = year_district_df[column].replace(0, np.nan).mean()
            
            # Fill 0s with the district mean for the given year
            mask = (df['district'] == district) & (df['YEAR'] == year) & (df[column] == 0)
            df.loc[mask, f'{column}_district_mean_filled'] = district_mean
    
    return df
def create_representative_column(df, columns, representative_col_name):
    # Calculate the mean of the specified columns
    df[representative_col_name] = df[columns].mean(axis=1)
    return df

def add_district_info(df):
    usps_districts = {
        'Atlantic Area': ['CT', 'DE', 'MD', 'MA', 'NH', 'NJ', 'NY', 'PA', 'RI', 'VT', 'DC'],
        'Central Area': ['IL', 'IN', 'IA', 'KS', 'KY', 'MI', 'MN', 'MO', 'NE', 'ND', 'OH', 'SD', 'WI'],
        'Southern Area': ['AL', 'AR', 'FL', 'GA', 'LA', 'MS', 'NC', 'OK', 'SC', 'TN', 'TX', 'VA', 'WV'],
        'Western Pacific Area': ['AK', 'AZ', 'CA', 'CO', 'HI', 'ID', 'MT', 'NV', 'NM', 'OR', 'UT', 'WA', 'WY']
    }
    # Create a reverse lookup for states to districts
    state_to_district = {state: district for district, states in usps_districts.items() for state in states}
    df['district'] = df['state'].map(state_to_district)
    return df



In [12]:
change_df = original_df
feature_cols = ["mean_ADHD", "mean_PTSD", "mean_anxiety", "mean_bipolar", "mean_depression",
                "mean_mental hospital", "mean_psychologist near me", "mean_psychiatrists near me", "mean_therapist near me",
                "median_ADHD", "median_PTSD", "median_anxiety", "median_bipolar", "median_depression",
                "median_mental hospital", "median_psychologist near me", "median_psychiatrists near me", "median_therapist near me"
                ]



# Example usage:
# print(usps_districts['Atlantic Area'])


for col in feature_cols:
    fill_zeros_with_district_mean(change_df, col)
    change_df = generateChangeDataFeatureColumns(change_df, col)
    change_df = generateChangeDataFeatureColumns(change_df, f'{col}_district_mean_filled')
print(len(change_df))
change_df = generateChangeDataTargetColumns(change_df, "UTIL_RATE")


change_df.to_csv('../data/aggregated_data_with_changes.csv', index=False)


True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: AL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: AK
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: AZ
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: AR
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: CA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: CO
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: CT
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: DE
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: DC
True
True
True
True
True
True
True
True
True
False
No previous year found for Year

  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: ND
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: OH
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: OK
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: OR
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: PA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: RI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: SC
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: SD
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: TN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State

  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: FL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: GA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: HI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: ID
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in 

  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
False
No previous year found for Year: 2013 in State: ID
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: LA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True


  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MO
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MT
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: NE
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: NV
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: NJ
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: NM
True
True


  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: CT
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: DE
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: DC
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: FL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: GA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: HI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: ID
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IL
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State

  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: LA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MO
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MT
True
True
True
True


  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: LA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MO
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MT


  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: LA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MO
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State

  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value
  df.at[row_index[0], f'{column}_perc_change'] = (current_value- last_value) / last_value


True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: IA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: KY
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: LA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MA
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MI
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MN
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in State: MS
True
True
True
True
True
True
True
True
True
False
No previous year found for Year: 2013 in 

In [13]:
row = test_df[(test_df['STATE'] == 'CT') & (test_df['YEAR'] == 2018)]
print(test_df['mean_ADHD_perc_change'])
val_change = row['mean_ADHD_val_change'].values[0] 
perc_change = row['mean_ADHD_perc_change'].values[0] 
val_2018 = row["mean_ADHD"].values[0]
print(f"Value Change for AL in 2018: {val_change}") 
print(f"Percentage Change for AL in 2018: {perc_change}")

row_2 = test_df[(test_df['STATE'] == 'AL') & (test_df['YEAR'] == 2017)]
val_2017 = row_2["mean_ADHD"].values[0]

print(f"2017 : {val_2017}, 2018 : {val_2018}")

NameError: name 'test_df' is not defined