In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy.stats as st
from scipy.stats import linregress

# Study data files
demographic_data = "resources/Demographics.csv"
birth_data = "resources/Birth_Statistics.csv"
cancer_data = "resources/Cancer_Rates.csv"
hospital_data = "resources/Hospitalization_Discharge_Rates.csv"
health_data = "resources/LakeCounty_Health.csv"
mortality_data = "resources/Mortality_Rates.csv"
obesity_data = "resources/Obesity_Percentages.csv"


# Read the mouse data and the study results
county_demographic_data = pd.read_csv(demographic_data)
county_birth_data = pd.read_csv(birth_data)
county_cancer_data = pd.read_csv(cancer_data)
county_hospital_data = pd.read_csv(hospital_data)
county_mortality_data = pd.read_csv(mortality_data)
county_obesity_data = pd.read_csv(obesity_data)

#demographics
county_demographic_data.head()


In [None]:
county_birth_data.head()

In [None]:
zip_code_df=pd.merge(county_demographic_data, county_birth_data, on= ["ZIP", "OBJECTID", "SHAPE_Length", "SHAPE_Area"] )
zip_code_df.head()

In [None]:
df = zip_code_df.set_index("ZIP")
df.head()

In [None]:
df.columns

In [None]:
#del df ["OBJECTID"]
#del df ["SHAPE_Length"]
#del df ["SHAPE_Area"]
df.drop (["OBJECTID", "SHAPE_Length", "SHAPE_Area"], axis=1, inplace=True)
df.head()

In [None]:
df.columns

In [None]:
df.rename(columns = {'Total_Pop':'Total Population', 'White__':'White(%)',
                              'African_Am':'African American(%)', 'Asian__':'Asian(%)', 'Hispanic_o':"Hispanic(%)", 'Under_5yea':"Under 5 years old(%)", "Under_18ye":"Under 18 years old(%)",'No_English':'Does not Speak English(%)', 'F18to64year': "18 to 64 years old(%)",'F65years_an':'65 years and older(%)', 'Male_':'Male(%)', 'Female_':'Female(%)', 'High_Schoo':'High School(%)', 'Associate_':'Associates Degree(%)', 'Bachelor_D':'Bachelors Degree(%)', 'Food_Stamp':'Utilizes Food Stamps(%)', 'Med_Income':'Median Household Income', 'No_HS':'No High School(%)', 'LBW':'Low Birth Weight', 'Preterm':'Preterm Birth', 'TeenBirth':'Teen Birth','Birth_Rate': 'Birth Rate', 'F1stTriCare':"1st Trimester of Care" }, inplace = True)
print(df.columns)

In [None]:
 df.head()

**Median Household Income vs Poverty**

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df["Poverty"])
n = list(df.index)
print(n)
(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

line_eq = "y = "+ str(round(slope,2)) + "x + " + str(round(intercept, 2))

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Poverty')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

**Median Household Income vs Race**

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['White(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('White Residents')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['Asian(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Asian Residents')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['Hispanic(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Hispanic Residents')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['African American(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('African American Residents')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

**Median Household income vs Education**

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['High School(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Graduated High School')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['Associates Degree(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Graduated with an Associates Degree')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['Bachelors Degree(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Graduated with a Bachelors Degree')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

In [None]:
x_values = list(df["Median Household Income"])
y_values = list(df['No High School(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Has not graduated from High School')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()

**Under 18 Births**


In [None]:
x_values = list(df["Teen Birth"])
y_values = list(df['Under 18 years old(%)'])

n = list(df.index)

(slope, intercept, rvalue, pvalue, stderr) = linregress(x_values, y_values)
regress_values = df["Median Household Income"] * slope + intercept

fig, ax = plt.subplots(figsize=(20, 10))
ax.scatter(x_values,y_values)
for i, txt in enumerate(n):
    ax.annotate(txt, (x_values[i], y_values[i]), fontsize=15)

plt.plot(x_values, regress_values,"r-")
plt.annotate(line_eq,(6,10),fontsize=15,color="red")

plt.xlabel('Median Household Income')
plt.ylabel('Has not graduated from High School')
plt.title("Lake County: Median Household Income vs Poverty")

print(f"R squared: {rvalue**2}")

plt.show()