In [47]:
import pandas as pd 
from pathlib import Path 
import matplotlib.pyplot as plt
import scipy.stats as st
import numpy as np


# Breast Cancer (either sex) incidences by Texas County 2015-2020

In [2]:
# All ethnicities broken out per year (2015-2019)

all_race_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_all_race.csv", skiprows=2).replace("~",0, regex=True)
all_race_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_all_race.csv", skiprows=2).replace("~",0, regex=True)
all_race_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_all_race.csv", skiprows=2).replace("~",0, regex=True)
all_race_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_all_race.csv", skiprows=2).replace("~",0, regex=True)
all_race_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_all_race.csv", skiprows=2).replace("~",0, regex=True)

In [3]:
# NH White broken out per year (2015-2019)

NHwhite_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_NHwhite.csv", skiprows=2).replace("~",0, regex=True)
NHwhite_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_NHwhite.csv", skiprows=2).replace("~",0, regex=True)
NHwhite_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_NHwhite.csv", skiprows=2).replace("~",0, regex=True)
NHwhite_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_NHwhite.csv", skiprows=2).replace("~",0, regex=True)
NHwhite_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_NHwhite.csv", skiprows=2).replace("~",0, regex=True)

In [4]:
# NH Black broken out per year (2015-2019)

NHblack_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_NHblack.csv", skiprows=2).replace("~",0, regex=True)
NHblack_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_NHblack.csv", skiprows=2).replace("~",0, regex=True)
NHblack_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_NHblack.csv", skiprows=2).replace("~",0, regex=True)
NHblack_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_NHblack.csv", skiprows=2).replace("~",0, regex=True)
NHblack_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_NHblack.csv", skiprows=2).replace("~",0, regex=True)

In [5]:
# Hispanic broken out per year (2015-2019)

hispanic_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_hispanic.csv", skiprows=2).replace("~",0, regex=True)
hispanic_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_hispanic.csv", skiprows=2).replace("~",0, regex=True)
hispanic_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_hispanic.csv", skiprows=2).replace("~",0, regex=True)
hispanic_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_hispanic.csv", skiprows=2).replace("~",0, regex=True)
hispanic_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_hispanic.csv", skiprows=2).replace("~",0, regex=True)

In [6]:
# NH Asian/PI broken out per year (2015-2019)

NHasianPI_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_NHasian-pi.csv", skiprows=2).replace("~",0, regex=True)
NHasianPI_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_NHasian-pi.csv", skiprows=2).replace("~",0, regex=True)
NHasianPI_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_NHasian-pi.csv", skiprows=2).replace("~",0, regex=True)
NHasianPI_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_NHasian-pi.csv", skiprows=2).replace("~",0, regex=True)
NHasianPI_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_NHasian-pi.csv", skiprows=2).replace("~",0, regex=True)

In [29]:
# NH American Indian/Alaskan broken out per year (2015-2019)

NHamerIndian_alaskan_2015 = pd.read_csv("Each_Year_CSV/2015_all_sex_NHamerIndian_alaskan.csv", skiprows=2).replace("~",0, regex=True)
NHamerIndian_alaskan_2016 = pd.read_csv("Each_Year_CSV/2016_all_sex_NHamerIndian_alaskan.csv", skiprows=2).replace("~",0, regex=True)
NHamerIndian_alaskan_2017 = pd.read_csv("Each_Year_CSV/2017_all_sex_NHamerIndian_alaskan.csv", skiprows=2).replace("~",0, regex=True)
NHamerIndian_alaskan_2018 = pd.read_csv("Each_Year_CSV/2018_all_sex_NHamerIndian_alaskan.csv", skiprows=2).replace("~",0, regex=True)
NHamerIndian_alaskan_2019 = pd.read_csv("Each_Year_CSV/2019_all_sex_NHamerIndian_alaskan.csv", skiprows=2).replace("~",0, regex=True)

In [48]:
# Add "Year" and "Race" columns to DataFrame and concatenate DataFrames for each individual race

df_list = []

def year_race_col (df, year, race):
    df["Year"] = year
    df["Race"] = race
    df.dropna(how="any", inplace=True)
    df_list.append(df)

year_race_col(NHwhite_2015, 2015, "Non-Hispanic White")
year_race_col(NHwhite_2016, 2016, "Non-Hispanic White")
year_race_col(NHwhite_2017, 2017, "Non-Hispanic White")
year_race_col(NHwhite_2018, 2018, "Non-Hispanic White")
year_race_col(NHwhite_2019, 2019, "Non-Hispanic White")

year_race_col(NHblack_2015, 2015, "Non-Hispanic Black")
year_race_col(NHblack_2016, 2016, "Non-Hispanic Black")
year_race_col(NHblack_2017, 2017, "Non-Hispanic Black")
year_race_col(NHblack_2018, 2018, "Non-Hispanic Black")
year_race_col(NHblack_2019, 2019, "Non-Hispanic Black")

year_race_col(hispanic_2015, 2015, "Hispanic")
year_race_col(hispanic_2016, 2016, "Hispanic")
year_race_col(hispanic_2017, 2017, "Hispanic")
year_race_col(hispanic_2018, 2018, "Hispanic")
year_race_col(hispanic_2019, 2019, "Hispanic")

year_race_col(NHasianPI_2015, 2015, "Non-Hispanic Asian")
year_race_col(NHasianPI_2016, 2016, "Non-Hispanic Asian")
year_race_col(NHasianPI_2017, 2017, "Non-Hispanic Asian")
year_race_col(NHasianPI_2018, 2018, "Non-Hispanic Asian")
year_race_col(NHasianPI_2019, 2019, "Non-Hispanic Asian")

year_race_col(NHamerIndian_alaskan_2015, 2015, "Non-Hispanic American Indian/Alaskan")
year_race_col(NHamerIndian_alaskan_2016, 2016, "Non-Hispanic American Indian/Alaskan")
year_race_col(NHamerIndian_alaskan_2017, 2017, "Non-Hispanic American Indian/Alaskan")
year_race_col(NHamerIndian_alaskan_2018, 2018, "Non-Hispanic American Indian/Alaskan")
year_race_col(NHamerIndian_alaskan_2019, 2019, "Non-Hispanic American Indian/Alaskan")

combined_df = pd.concat(df_list)
combined_df_reindex = combined_df.reindex(columns=[
    "Year", "Race", "County", "Population at Risk", 
    "Cases", "Crude Rate", "Age-adjusted Rate", 
    "95% Confidence Interval-Lower Limit", 
    "95% Confidence Interval-Upper Limit"])
combined_df_County = combined_df_reindex.loc[combined_df_reindex["County"] != "STATE"]
combined_df_County


Unnamed: 0,Year,Race,County,Population at Risk,Cases,Crude Rate,Age-adjusted Rate,95% Confidence Interval-Lower Limit,95% Confidence Interval-Upper Limit
0,2015,Non-Hispanic White,Jeff Davis,1372.0,0,0,0,0,0
1,2015,Non-Hispanic White,Blanco,8668.0,18,207.7,156.3,82.0,273.1
2,2015,Non-Hispanic White,Yoakum,2833.0,0,0,0,0,0
3,2015,Non-Hispanic White,Gonzales,8786.0,16,182.1,134.5,71.3,234.5
4,2015,Non-Hispanic White,Maverick,1594.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
15,2019,Non-Hispanic American Indian/Alaskan,Galveston,1394.0,0,0,0,0,0
16,2019,Non-Hispanic American Indian/Alaskan,Montgomery,2503.0,0,0,0,0,0
17,2019,Non-Hispanic American Indian/Alaskan,Bell,1891.0,0,0,0,0,0
18,2019,Non-Hispanic American Indian/Alaskan,Bexar,5788.0,0,0,0,0,0


In [50]:
# Add "Year" and "Race" columns to DataFrame and concatenate DataFrames for all combined races

df_list_all = []

def year_race_col_all (df_all, year, race):
    df_all["Year"] = year
    df_all["Race"] = race
    df_all.dropna(how="any", inplace=True)
    df_list_all.append(df_all)

year_race_col_all(all_race_2015, 2015, "All Races")
year_race_col_all(all_race_2016, 2016, "All Races")
year_race_col_all(all_race_2017, 2017, "All Races")
year_race_col_all(all_race_2018, 2018, "All Races")
year_race_col_all(all_race_2019, 2019, "All Races")

combined_df_all = pd.concat(df_list_all)
combined_df_reindex_all = combined_df_all.reindex(columns=[
    "Year", "Race", "County", "Population at Risk", 
    "Cases", "Crude Rate", "Age-adjusted Rate", 
    "95% Confidence Interval-Lower Limit", 
    "95% Confidence Interval-Upper Limit"])
combined_df_County_all = combined_df_reindex_all.loc[combined_df_reindex_all["County"] != "STATE"]
combined_df_County_all

Unnamed: 0,Year,Race,County,Population at Risk,Cases,Crude Rate,Age-adjusted Rate,95% Confidence Interval-Lower Limit,95% Confidence Interval-Upper Limit
0,2015,All Races,Collingsworth,3007.0,0,0,0,0,0
1,2015,All Races,Blanco,11030.0,20,181.3,136.8,77.5,225.9
2,2015,All Races,Hall,3093.0,0,0,0,0,0
3,2015,All Races,Cottle,1437.0,0,0,0,0,0
4,2015,All Races,Mason,4063.0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...
241,2019,All Races,Schleicher,2801.0,0,0.0,0.0,0.0,0.0
242,2019,All Races,Jeff Davis,2275.0,0,0.0,0.0,0.0,0.0
243,2019,All Races,Concho,2749.0,0,0.0,0.0,0.0,0.0
244,2019,All Races,Oldham,2115.0,0,0.0,0.0,0.0,0.0


In [1]:
# unemployment_df = pd.read_csv("Unemployment_Income/Unemployment_2015-2019.csv")
# # unemployment_df.str.strip(self, to_strip=" County, TX")
# unemployment_df

---

# Annual Personal Income by TX County 2015-2019

In [39]:
api_df = pd.read_csv("Unemployment_Income/API_2015-2019.csv")
api_df

Unnamed: 0,County,Description,Unit,2015,2016,2017,2018,2019
0,"Anderson, TX",Personal income (thousands of dollars),Thousands of dollars,1868364,1918588,1939647,2008104,2114625
1,"Anderson, TX",Population (persons) 1/,Number of persons,57707,57644,58288,58201,57884
2,"Anderson, TX",Per capita personal income (dollars) 2/,Dollars,32377,33283,33277,34503,36532
3,"Andrews, TX",Personal income (thousands of dollars),Thousands of dollars,831177,728104,805103,926151,990376
4,"Andrews, TX",Population (persons) 1/,Number of persons,17928,17622,17424,17858,18581
...,...,...,...,...,...,...,...,...
757,"Zapata, TX",Population (persons) 1/,Number of persons,14324,14242,14047,13919,13960
758,"Zapata, TX",Per capita personal income (dollars) 2/,Dollars,29961,25634,27636,29323,29858
759,"Zavala, TX",Personal income (thousands of dollars),Thousands of dollars,334791,316165,323514,344038,358601
760,"Zavala, TX",Population (persons) 1/,Number of persons,11099,10706,10339,10125,9819


---

# Gross Domestic Product by TX County 2015-2019

In [40]:
gdp_df = pd.read_csv("Unemployment_Income/GDP_county_2015-2019.csv")
gdp_df

Unnamed: 0,County,Description,Unit,2015,2016,2017,2018,2019
0,"Anderson, TX",Real GDP (thousands of chained 2012 dollars),Thousands of chained 2012 dollars,1665103.000,1716076.000,1760892.000,1820342.000,1889459.000
1,"Anderson, TX",Chain-type quantity indexes for real GDP,Quantity index,93.421,96.281,98.795,102.131,106.009
2,"Anderson, TX",Current-dollar GDP (thousands of current dolla...,Thousands of dollars,1721441.000,1788842.000,1868140.000,1978146.000,2089526.000
3,"Andrews, TX",Real GDP (thousands of chained 2012 dollars),Thousands of chained 2012 dollars,4842339.000,4950791.000,4993989.000,5048964.000,5163990.000
4,"Andrews, TX",Chain-type quantity indexes for real GDP,Quantity index,98.550,100.757,101.636,102.755,105.096
...,...,...,...,...,...,...,...,...
757,"Zapata, TX",Chain-type quantity indexes for real GDP,Quantity index,89.856,73.898,78.648,75.558,78.227
758,"Zapata, TX",Current-dollar GDP (thousands of current dolla...,Thousands of dollars,716440.000,543811.000,663709.000,692009.000,658697.000
759,"Zavala, TX",Real GDP (thousands of chained 2012 dollars),Thousands of chained 2012 dollars,1192850.000,1117977.000,999557.000,899507.000,1056527.000
760,"Zavala, TX",Chain-type quantity indexes for real GDP,Quantity index,209.725,196.561,175.741,158.150,185.757


---

In [None]:
plt.hist()