In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# [Census Block Group 2010 TX](https://schoolsdata2-93b5c-tea-texas.opendata.arcgis.com/datasets/census-block-group-2010-tx/) #

2010 Census Block Group data(County Level) in Texas from [Texas Education Agency Public Open Data Site](https://schoolsdata2-tea-texas.opendata.arcgis.com/).

Refer [Census_Block_Group_2010_TX_variables.csv](Census_Block_Group_2010_TX_variables.csv) for datailed description.

## Loading Data ##

In [2]:
df = pd.read_csv('Census_Block_Group_2010_TX.csv')
print('Shape:', df.shape)
df.info()

Shape: (15800, 57)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15800 entries, 0 to 15799
Data columns (total 57 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   OBJECTID_1     15800 non-null  int64  
 1   OBJECTID       15800 non-null  int64  
 2   STATE_FIPS     15800 non-null  int64  
 3   CNTY_FIPS      15800 non-null  int64  
 4   STCOFIPS       15800 non-null  int64  
 5   TRACT          15800 non-null  int64  
 6   BLKGRP         15800 non-null  int64  
 7   FIPS           15800 non-null  float64
 8   POPULATION     15800 non-null  int64  
 9   POP_SQMI       15800 non-null  float64
 10  POP2010        15800 non-null  int64  
 11  POP10_SQMI     15800 non-null  float64
 12  WHITE          15800 non-null  int64  
 13  BLACK          15800 non-null  int64  
 14  AMERI_ES       15800 non-null  int64  
 15  ASIAN          15800 non-null  int64  
 16  HAWN_PI        15800 non-null  int64  
 17  HISPANIC       15800 non-null  

Renaming neccessary columns with proper names

In [3]:
cols_old = ['STCOFIPS',
'POP2010',
'WHITE',
'BLACK',
'ASIAN',
'HISPANIC',
'MALES',
'FEMALES',
'AGE_UNDER5',
'AGE_5_9',
'AGE_10_14',
'AGE_15_19',
'AGE_20_24',
'AGE_25_34',
'AGE_35_44',
'AGE_45_54',
'AGE_55_64',
'AGE_65_74',
'AGE_75_84',
'AGE_85_UP',
'MED_AGE',
'MED_AGE_M',
'MED_AGE_F',
'HOUSEHOLDS',
'AVE_HH_SZ',
'HSEHLD_1_M',
'HSEHLD_1_F',
'MARHH_CHD',
'MARHH_NO_C',
'MHH_CHILD',
'FHH_CHILD',
'FAMILIES',
'AVE_FAM_SZ',
'HSE_UNITS',
'VACANT',
'OWNER_OCC',
'RENTER_OCC']
cols_new = ['County #',
            'Population 10',
            'White Pop 10',
            'Black Pop 10',
            'Asian Pop 10',
            'Hispanic Pop 10',
            'Male Pop 10',
            'Female Pop 10',
            'Age 0-4 Pop 10',
            'Age 5-9 Pop 10',
            'Age 10-14 Pop 10',
            'Age 15-19 Pop 10',
            'Age 20-24 Pop 10',
            'Age 25-34 Pop 10',
            'Age 35-44 Pop 10',
            'Age 45-54 Pop 10',
            'Age 55-64 Pop 10',
            'Age 65-74 Pop 10',
            'Age 75-84 Pop 10',
            'Age 85-Up Pop 10',
            'Median Age 10',
            'Median Age Male 10',
            'Median Age Female 10',
            '# of Households 10',
            'Avg Household Size 10',
            'HH 1 Male 10',
            'HH 1 Female 10',
            'HH Married-Child 10',
            'HH Married-noChild 10',
            'HH Male-Child 10',
            'HH Female-Child 10',
            '# of Families 10',
            'Avg Family Size 10',
            '# of Housing Units 10',
            'Housing Vacant 10',
            'Housing Owner Occup 10',
            'Housing Renter Occup 10']

In [5]:
df.rename(columns=dict(zip(cols_old, cols_new)), inplace=True)
df.columns

Index(['OBJECTID_1', 'OBJECTID', 'STATE_FIPS', 'CNTY_FIPS', 'County #',
       'TRACT', 'BLKGRP', 'FIPS', 'POPULATION', 'POP_SQMI', 'Population 10',
       'POP10_SQMI', 'White Pop 10', 'Black Pop 10', 'AMERI_ES',
       'Asian Pop 10', 'HAWN_PI', 'Hispanic Pop 10', 'OTHER', 'MULT_RACE',
       'Male Pop 10', 'Female Pop 10', 'Age 0-4 Pop 10', 'Age 5-9 Pop 10',
       'Age 10-14 Pop 10', 'Age 15-19 Pop 10', 'Age 20-24 Pop 10',
       'Age 25-34 Pop 10', 'Age 35-44 Pop 10', 'Age 45-54 Pop 10',
       'Age 55-64 Pop 10', 'Age 65-74 Pop 10', 'Age 75-84 Pop 10',
       'Age 85-Up Pop 10', 'Median Age 10', 'Median Age Male 10',
       'Median Age Female 10', '# of Households 10', 'Avg Household Size 10',
       'HH 1 Male 10', 'HH 1 Female 10', 'HH Married-Child 10',
       'HH Married-noChild 10', 'HH Male-Child 10', 'HH Female-Child 10',
       '# of Families 10', 'Avg Family Size 10', '# of Housing Units 10',
       'Housing Vacant 10', 'Housing Owner Occup 10',
       'Housing Renter Oc

As per Block Group data, neccessary columns are aggregated to county level with proper aggregations. All columns are aggregated with sum(), except below that are aggregated to mean():

- `Median Age`
- `Median Age Male`
- `Median Age Female`
- `Avg Household Size`
- `Avg Family Size`

In [6]:
cols_mean = ['Median Age',
            'Median Age Male',
            'Median Age Female',
            'Avg Household Size',
            'Avg Family Size']
cols_agg = {c: 'mean' if c in cols_mean else 'sum' for c in cols_new}
cols_agg.pop('County #')

'sum'

In [7]:
df_county = df.groupby('County #', as_index=False).agg(cols_agg)
df_county.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 254 entries, 0 to 253
Data columns (total 37 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   County #                 254 non-null    int64  
 1   Population 10            254 non-null    int64  
 2   White Pop 10             254 non-null    int64  
 3   Black Pop 10             254 non-null    int64  
 4   Asian Pop 10             254 non-null    int64  
 5   Hispanic Pop 10          254 non-null    int64  
 6   Male Pop 10              254 non-null    int64  
 7   Female Pop 10            254 non-null    int64  
 8   Age 0-4 Pop 10           254 non-null    int64  
 9   Age 5-9 Pop 10           254 non-null    int64  
 10  Age 10-14 Pop 10         254 non-null    int64  
 11  Age 15-19 Pop 10         254 non-null    int64  
 12  Age 20-24 Pop 10         254 non-null    int64  
 13  Age 25-34 Pop 10         254 non-null    int64  
 14  Age 35-44 Pop 10         2

In [8]:
df_county.to_csv('DATA_Census_Block_Group_2010_TX_County.csv', index=False)