In [1]:
import pandas as pd

# Load the dataset
file_path = 'Documents/food_access_research_atlas.csv'
food_access_data = pd.read_csv(file_path)

# Display basic information about the dataset
basic_info = {
    "First Five Rows": food_access_data.head(),
    "Column Names": food_access_data.columns.tolist(),
    "Dataset Shape": food_access_data.shape
}

basic_info


{'First Five Rows':    CensusTract    State   County  Urban  POP2010  OHU2010  GroupQuartersFlag  \
 0   1001020100  Alabama  Autauga      1     1912      693                  0   
 1   1001020200  Alabama  Autauga      1     2170      743                  0   
 2   1001020300  Alabama  Autauga      1     3373     1256                  0   
 3   1001020400  Alabama  Autauga      1     4386     1722                  0   
 4   1001020500  Alabama  Autauga      1    10766     4082                  0   
 
    NUMGQTRS  PCTGQTRS  LILATracts_1And10  ...  TractSeniors  TractWhite  \
 0         0  0.000000                  0  ...           221        1622   
 1       181  0.083410                  0  ...           214         888   
 2         0  0.000000                  0  ...           439        2576   
 3         0  0.000000                  0  ...           904        4086   
 4       181  0.016812                  0  ...          1126        8666   
 
    TractBlack  TractAsian  TractNH

In [2]:
# Identifying potential columns that could be used to define a food desert
# We're looking for columns related to access to food, grocery stores, or similar indicators

food_access_columns = [col for col in food_access_data.columns if 'food' in col.lower() or 'grocery' in col.lower() or 'snap' in col.lower()]
food_access_columns[:10]  # Displaying first 10 for brevity


['lasnaphalf',
 'lasnaphalfshare',
 'lasnap1',
 'lasnap1share',
 'lasnap10',
 'lasnap10share',
 'lasnap20',
 'lasnap20share',
 'TractSNAP']

In [4]:
# Categorizing potential food deserts based on poverty rate, median family income, and SNAP beneficiaries

# Thresholds for defining potential food deserts (these are arbitrary and can be adjusted)
poverty_rate_threshold = 20  # percent
median_income_threshold = food_access_data['MedianFamilyIncome'].quantile(0.25)  # lower quartile
snap_beneficiary_threshold = 20  # percent

# Creating a food desert indicator
food_access_data['PotentialFoodDesert'] = (
    (food_access_data['PovertyRate'] >= poverty_rate_threshold) &
    (food_access_data['MedianFamilyIncome'] <= median_income_threshold) &
    (food_access_data['TractSNAP'] / food_access_data['POP2010'] * 100 >= snap_beneficiary_threshold)
)

# Checking the distribution of potential food deserts
food_desert_distribution = food_access_data['PotentialFoodDesert'].value_counts(normalize=True) * 100
food_desert_distribution


PotentialFoodDesert
False    98.907554
True      1.092446
Name: proportion, dtype: float64

In [7]:
from scipy.stats import ttest_ind

# Calculating average median family income for potential food desert areas and non-food desert areas
average_income_food_desert = food_access_data[food_access_data['PotentialFoodDesert']]['MedianFamilyIncome'].mean()
average_income_non_food_desert = food_access_data[~food_access_data['PotentialFoodDesert']]['MedianFamilyIncome'].mean()

# Performing a t-test to check if the difference in median family incomes is statistically significant
income_food_desert = food_access_data[food_access_data['PotentialFoodDesert']]['MedianFamilyIncome']
income_non_food_desert = food_access_data[~food_access_data['PotentialFoodDesert']]['MedianFamilyIncome']

t_stat, p_value = ttest_ind(income_food_desert, income_non_food_desert)

average_income_results = {
    "Average Median Family Income in Potential Food Deserts": average_income_food_desert,
    "Average Median Family Income in Non-Food Desert Areas": average_income_non_food_desert,
    "T-Statistic": t_stat,
    "P-Value": p_value
}

average_income_results


{'Average Median Family Income in Potential Food Deserts': 22544.32663316583,
 'Average Median Family Income in Non-Food Desert Areas': 67927.52941666204,
 'T-Statistic': -37.72072679729716,
 'P-Value': 2.1810753147927597e-308}