In [92]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import PolynomialFeatures
from scipy.stats import ttest_rel, wilcoxon

import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

In [93]:
path1= "../FCI/ACS_2023_Median_Income_by_Race_and_Age_Selp_Emp_Boundaries.csv"
path2= "../FCI/ACS_10_14_Median_Income_by_Race_and_Age_Selp_Emp_Boundaries.csv"

ACS_median_income_23= pd.read_csv(path1)
ACS_median_income_14= pd.read_csv(path2)

In [94]:
ACS_merge = pd.merge(ACS_median_income_23,ACS_median_income_14, on='Name', how='inner', suffixes=('_2023', '_2014'))

In [95]:
print(ACS_merge.columns)

Index(['Object_ID_2023', 'Geographic_Identifier_-_FIPS_Code_2023',
       'Area_of_Land_(Square_Meters)_2023',
       'Area_of_Water_(Square_Meters)_2023', 'Name', 'State_2023',
       'County_2023',
       'Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2023',
       'Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_-_Margin_of_Error_2023',
       'Median_Household_Income_in_past_12_months,_Householder_under_25_years_2023',
       'Median_Household_Income_in_past_12_months,_Householder_under_25_years_-_Margin_of_Error_2023',
       'Median_Household_Income_in_past_12_months,_Householder_25_to_44_years_2023',
       'Median_Household_Income_in_past_12_months,_Householder_25_to_44_years_-_Margin_of_Error_2023',
       'Median_Household_Income_in_past_12_months,_Householder_45_to_64_years_2023',
       'Median_Household_Income_in_past_12_months,_Householder_45_to_64_years_-_Margin_of_E

In [96]:
ACS_merge['Median_Household_Income_in_past_12_months'] = ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2023'] - ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2014']
ACS_merge['Median_Household_Income_in_past_12_months_under_25'] = ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2023'] - ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2014']
ACS_merge['Median_Household_Income_in_past_12_months_African_American'] = ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2023'] - ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2014']
ACS_merge['Total_House_Hold'] = ACS_merge['Total_Households_2023'] - ACS_merge['Total_Households_2014']


In [97]:
X= ACS_merge[['Median_Household_Income_in_past_12_months','Median_Household_Income_in_past_12_months_under_25','Median_Household_Income_in_past_12_months_African_American','Total_House_Hold']]
Y= ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2023']

In [98]:
X_train,X_test,Y_train,Y_test= train_test_split(X,Y,test_size=0.2, random_state=0)

model= LinearRegression()
model.fit(X_train,Y_train)

Y_pred = model.predict(X_test)

In [99]:
print("Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

MSE= mean_squared_error(Y_test, Y_pred)
R2=r2_score(Y_test,Y_pred)
print("Mean Squared Error:", MSE)
print("R^2 score:",R2)

Coefficients: [ 0.23502569  0.74975492  0.01468993 -6.16304042]
Intercept: 9375.902844538694
Mean Squared Error: 53707180.55244131
R^2 score: 0.8904964842185048


In [100]:
descriptive_stats_2014 = ACS_median_income_14.describe()
descriptive_stats_2023 = ACS_median_income_23.describe()

print("Descriptive Statistics for 2014:")
print(descriptive_stats_2014)
print("\nDescriptive Statistics for 2023:")
print(descriptive_stats_2023)

Descriptive Statistics for 2014:
         Object_ID  Geographic_Identifier_-_FIPS_Code  \
count    56.000000                       5.600000e+01   
mean   2891.500000                       1.200100e+10   
std      16.309506                       1.461311e+04   
min    2864.000000                       1.200100e+10   
25%    2877.750000                       1.200100e+10   
50%    2891.500000                       1.200100e+10   
75%    2905.250000                       1.200100e+10   
max    2919.000000                       1.200111e+10   

       Area_of_Land_(Square_Meters)  Area_of_Water_(Square_Meters)  \
count                  5.600000e+01                   5.600000e+01   
mean                   4.047105e+07                   4.336130e+06   
std                    7.238478e+07                   1.615153e+07   
min                    5.246420e+05                   0.000000e+00   
25%                    2.411816e+06                   8.837750e+03   
50%                    7.563150e+

In [101]:
t_test_results = {
    'Past_12_months_income': ttest_rel(ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2023'], ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2014']),
    'Past_12_months_income_under_25y': ttest_rel(ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2023'], ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2014']),
    'Past_12_months_income_African_American': ttest_rel(ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2023'], ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2014']),
    'Total_House_Hold': ttest_rel(ACS_merge['Total_Households_2023'], ACS_merge['Total_Households_2014'])
}

print("\nPaired t-test Results:")
for key, value in t_test_results.items():
    print(f"{key}: t-statistic ={value.statistic}, p-value = {value.pvalue}")


Paired t-test Results:
Past_12_months_income: t-statistic =6.883280749740637, p-value = 1.6954078693061776e-08
Past_12_months_income_under_25y: t-statistic =0.28770938062676665, p-value = 0.7749200262946419
Past_12_months_income_African_American: t-statistic =0.9122331886276916, p-value = 0.36661726652288684
Total_House_Hold: t-statistic =6.735233075837132, p-value = 2.7969565884388045e-08


In [102]:
wilcoxon_results = {
    'Past_12_months_income': wilcoxon(ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2023'], ACS_merge['Median_Household_Income_in_past_12_months_(inflation-adjusted_dollars_to_last_year_of_5-year_range)_2014']),
    'Past_12_months_income_under_25y': wilcoxon(ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2023'], ACS_merge['Median_Household_Income_in_past_12_months,_Householder_under_25_years_2014']),
    'Past_12_months_income_African_American': wilcoxon(ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2023'], ACS_merge['Median_Household_Income_in_past_12_months,_Black_or_African_American_Alone_Householder_2014']),
    'Total_House_Hold': wilcoxon(ACS_merge['Total_Households_2023'], ACS_merge['Total_Households_2014'])
}

print("\nWilcoxon signed-rank test Results:")
for key, value in wilcoxon_results.items():
    print(f"{key}: statistic = {result.statistic}, p-value = {value.pvalue}")


Wilcoxon signed-rank test Results:
Past_12_months_income: statistic = 77.0, p-value = 5.0145445129601285e-08
Past_12_months_income_under_25y: statistic = 77.0, p-value = 0.9531185402094994
Past_12_months_income_African_American: statistic = 77.0, p-value = 0.14869027289034498
Total_House_Hold: statistic = 77.0, p-value = 3.407143367439858e-08


