In [2]:
import pandas as pd

In [3]:
#Import the consolidated data set
file_path = "data\\3_intermediate\\int_texas_border_report.csv"

border_report = pd.read_csv(
    file_path, 
    parse_dates=['nibrs_start_date', 'report_year'], 
    date_format='%Y-%m-%d')

border_report.head()

Unnamed: 0,agency_name,county,population,nibrs_start_date,murder_and_nonnegligent_manslaughter,negligent_manslaughter,rape,robbery,assault,burglary,...,report_year,agency_type,nibrs_eligible_months,has_preliminary_nibrs_reporting,nibrs_contribution_percentage,cohort_year,cohort_quarter,cohort,adoption_status,adoption_status_numeric
0,ALAMO PD,Hidalgo County,19351,2018-08-01,0,0,0,0,0,0,...,2017,Police Department,0,False,,2018,3,2018Q3,early,1
1,ALPINE PD,Brewster County,5983,2020-04-01,0,0,0,0,0,0,...,2017,Police Department,0,False,,2020,2,2020Q2,early,1
2,ALTON PD,Hidalgo County,17432,2019-09-01,0,0,0,0,0,0,...,2017,Police Department,0,False,,2019,3,2019Q3,early,1
3,ANTHONY PD,El Paso County,5681,2020-12-01,0,0,0,0,0,0,...,2017,Police Department,0,False,,2020,4,2020Q4,early,1
4,BREWSTER CO SO,Brewster County,3196,2015-01-01,1,0,2,0,19,4,...,2017,Sheriff's Office,12,False,91.67,2015,1,2015Q1,early,1


In [6]:
# Filter the border_report where population is 0 and for the most recent report year
zero_population = border_report[(border_report['population'] == 0) & (border_report['report_year'] == '2023')]

# Count unique agency names per agency type for the most recent year
zero_population_agencies = zero_population.groupby('agency_type')['agency_name'].nunique()

#Convert zero_population_agencies to a dataframe
zero_population_agencies_df = zero_population_agencies.to_frame().reset_index()

#Calculate the percentage of agencies in zero_population_agencies_df and round to two decimal places
zero_population_agencies_df['percentage'] = round((zero_population_agencies_df['agency_name'] / zero_population_agencies_df['agency_name'].sum()) * 100, 2)

#Sort zero_population_agencies_df by percentage in descending order
zero_population_agencies_df = zero_population_agencies_df.sort_values(by='percentage', ascending=False)

#Print total number of agencies with zero population
print(f"Total number of agencies with zero population: {zero_population_agencies_df['agency_name'].sum()}")
print()
print(zero_population_agencies_df.to_markdown(index=False))


Total number of agencies with zero population: 24

| agency_type       |   agency_name |   percentage |
|:------------------|--------------:|-------------:|
| School Police     |            11 |        45.83 |
| Constable         |             6 |        25    |
| University Police |             6 |        25    |
| Police Department |             1 |         4.17 |


In [5]:
# Drop rows where the population is zero
populated_agencies = border_report[border_report['population'] > 0]

# Group by county and report_year, the calculation the population for the county
county_population = populated_agencies.groupby(['county', 'report_year'])['population'].sum().reset_index()

# Pivot for easier analysis
population_pivot = county_population.pivot(index='county', columns='report_year', values='population')

# Calculate the overall percent change and numerical change
first_year, last_year = population_pivot.columns[0], population_pivot.columns[-1]
overall_percent_change = (population_pivot[last_year] - population_pivot[first_year]) / population_pivot[first_year] * 100
numerical_change = population_pivot[last_year] - population_pivot[first_year]

# Adjustments as per requirements
overall_percent_change_rounded = overall_percent_change.round(2)
numerical_change_int = numerical_change.astype(int)
most_recent_year_population_int = population_pivot[last_year].astype(int)

# Assemble the final table
final_table = pd.DataFrame({
    'County': population_pivot.index,
    'Latest Population': most_recent_year_population_int.values,
    'Numerical Change': numerical_change_int.values,
    'Percent Change': overall_percent_change_rounded.values
}).reset_index(drop=True)

#Sort final_table by Numerical Change in descending order
final_table = final_table.sort_values(by='Numerical Change', ascending=False)

#Calculate the average percentage change in population
average_percent_change = overall_percent_change.mean()
print(f"Average percentage change in population: {average_percent_change:.2f}%")

print(final_table.sort_values(by='Percent Change', ascending=False).head(n=3).to_markdown(index=False))

Average percentage change in population: -4.65%
| County         |   Latest Population |   Numerical Change |   Percent Change |
|:---------------|--------------------:|-------------------:|-----------------:|
| Hidalgo County |              888934 |              33597 |             3.93 |
| El Paso County |              875027 |              32122 |             3.81 |
| Starr County   |               66662 |               2102 |             3.26 |
