Test notebook

In [4]:
# Dependencies and Setup
import missingno as msno
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy import stats
from scipy.stats import linregress
import scipy.stats as st
import sys
import json
import requests

# Load the data file, read and display it with Pandas
q037_data = Path("Q037.csv")

# Read and store into Pandas DataFrames
q037_df = pd.read_csv(q037_data)
q037_df.head()

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Datasource,Class,Topic,Question,Data_Value_Unit,Data_Value_Type,...,GeoLocation,ClassID,TopicID,QuestionID,DataValueTypeID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
0,2013,2013,US,National,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,,OWS,OWS1,Q037,VALUE,59,Income,Data not reported,INC,INCNR
1,2012,2012,WY,Wyoming,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,"(43.235541343, -108.109830353)",OWS,OWS1,Q037,VALUE,56,Race/Ethnicity,American Indian/Alaska Native,RACE,RACENAA
2,2015,2015,RI,Rhode Island,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,"(41.708280193, -71.522470314)",OWS,OWS1,Q037,VALUE,44,Race/Ethnicity,Hispanic,RACE,RACEHIS
3,2015,2015,GU,Guam,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,"(13.444304, 144.793731)",OWS,OWS1,Q037,VALUE,66,Race/Ethnicity,Other,RACE,RACEOTH
4,2012,2012,US,National,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,,OWS,OWS1,Q037,VALUE,59,Education,Some college or technical school,EDU,EDUCOTEC


In [5]:
q037_df.columns

Index(['YearStart', 'YearEnd', 'LocationAbbr', 'LocationDesc', 'Datasource',
       'Class', 'Topic', 'Question', 'Data_Value_Unit', 'Data_Value_Type',
       'Data_Value', 'Data_Value_Alt', 'Data_Value_Footnote_Symbol',
       'Data_Value_Footnote', 'Low_Confidence_Limit', 'High_Confidence_Limit ',
       'Sample_Size', 'Total', 'Age(years)', 'Education', 'Gender', 'Income',
       'Race/Ethnicity', 'GeoLocation', 'ClassID', 'TopicID', 'QuestionID',
       'DataValueTypeID', 'LocationID', 'StratificationCategory1',
       'Stratification1', 'StratificationCategoryId1', 'StratificationID1'],
      dtype='object')

In [44]:
# Trim the columns of data to the one we need
q037_df_trim = q037_df.drop(['Datasource','Class', 'Topic','Data_Value_Unit', 'Data_Value_Type','Data_Value_Alt','Data_Value_Footnote_Symbol','Data_Value_Footnote','Total', 'DataValueTypeID'], axis=1)
q037_df_trim

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Question,Data_Value,Low_Confidence_Limit,High_Confidence_Limit,Sample_Size,Age(years),...,Race/Ethnicity,GeoLocation,ClassID,TopicID,QuestionID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
0,2013,2013,US,National,Percent of adults aged 18 years and older who ...,32.7,31.9,33.5,60069.0,,...,,,OWS,OWS1,Q037,59,Income,Data not reported,INC,INCNR
1,2012,2012,WY,Wyoming,Percent of adults aged 18 years and older who ...,48.5,32.3,64.9,69.0,,...,American Indian/Alaska Native,"(43.235541343, -108.109830353)",OWS,OWS1,Q037,56,Race/Ethnicity,American Indian/Alaska Native,RACE,RACENAA
2,2015,2015,RI,Rhode Island,Percent of adults aged 18 years and older who ...,40.2,33.3,47.4,354.0,,...,Hispanic,"(41.708280193, -71.522470314)",OWS,OWS1,Q037,44,Race/Ethnicity,Hispanic,RACE,RACEHIS
3,2015,2015,GU,Guam,Percent of adults aged 18 years and older who ...,,,,,,...,Other,"(13.444304, 144.793731)",OWS,OWS1,Q037,66,Race/Ethnicity,Other,RACE,RACEOTH
4,2012,2012,US,National,Percent of adults aged 18 years and older who ...,35.1,34.6,35.7,120032.0,,...,,,OWS,OWS1,Q037,59,Education,Some college or technical school,EDU,EDUCOTEC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18112,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,41.5,37.6,45.6,722.0,55 - 64,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Age (years),55 - 64,AGEYR,AGEYR5564
18113,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,38.0,35.4,40.6,1860.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Gender,Male,GEN,MALE
18114,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,37.3,32.8,42.0,663.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Income,"$50,000 - $74,999",INC,INC5075
18115,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,24.5,16.3,35.3,111.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Income,"Less than $15,000",INC,INCLESS15


In [45]:
# Remove the names of the territories
q037_df_trim_filtered = q037_df_trim[~q037_df_trim['LocationDesc'].str.contains('Guam|Virgin Islands|Puerto Rico')]

#remove NA Data value
q037_df_trim_cleaned = q037_df_trim_filtered.dropna(subset=['Data_Value'])
q037_df_trim_cleaned

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Question,Data_Value,Low_Confidence_Limit,High_Confidence_Limit,Sample_Size,Age(years),...,Race/Ethnicity,GeoLocation,ClassID,TopicID,QuestionID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
0,2013,2013,US,National,Percent of adults aged 18 years and older who ...,32.7,31.9,33.5,60069.0,,...,,,OWS,OWS1,Q037,59,Income,Data not reported,INC,INCNR
1,2012,2012,WY,Wyoming,Percent of adults aged 18 years and older who ...,48.5,32.3,64.9,69.0,,...,American Indian/Alaska Native,"(43.235541343, -108.109830353)",OWS,OWS1,Q037,56,Race/Ethnicity,American Indian/Alaska Native,RACE,RACENAA
2,2015,2015,RI,Rhode Island,Percent of adults aged 18 years and older who ...,40.2,33.3,47.4,354.0,,...,Hispanic,"(41.708280193, -71.522470314)",OWS,OWS1,Q037,44,Race/Ethnicity,Hispanic,RACE,RACEHIS
4,2012,2012,US,National,Percent of adults aged 18 years and older who ...,35.1,34.6,35.7,120032.0,,...,,,OWS,OWS1,Q037,59,Education,Some college or technical school,EDU,EDUCOTEC
7,2011,2011,US,National,Percent of adults aged 18 years and older who ...,31.6,30.7,32.6,49576.0,,...,,,OWS,OWS1,Q037,59,Income,"Less than $15,000",INC,INCLESS15
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
18112,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,41.5,37.6,45.6,722.0,55 - 64,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Age (years),55 - 64,AGEYR,AGEYR5564
18113,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,38.0,35.4,40.6,1860.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Gender,Male,GEN,MALE
18114,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,37.3,32.8,42.0,663.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Income,"$50,000 - $74,999",INC,INC5075
18115,2022,2022,WY,Wyoming,Percent of adults aged 18 years and older who ...,24.5,16.3,35.3,111.0,,...,,"(43.23554134300048, -108.10983035299967)",OWS,OWS1,Q037,56,Income,"Less than $15,000",INC,INCLESS15


In [46]:
#filter only National data
q037_natl_df = q037_df_trim_cleaned[q037_df_trim_cleaned['LocationDesc'].str.contains('National')].sort_values(by=['YearStart', 'Stratification1'])
q037_natl_df

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Question,Data_Value,Low_Confidence_Limit,High_Confidence_Limit,Sample_Size,Age(years),...,Race/Ethnicity,GeoLocation,ClassID,TopicID,QuestionID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
996,2011,2011,US,National,Percent of adults aged 18 years and older who ...,34.5,33.8,35.2,74022.0,,...,,,OWS,OWS1,Q037,59,Income,"$15,000 - $24,999",INC,INC1525
20,2011,2011,US,National,Percent of adults aged 18 years and older who ...,35.5,34.6,36.4,48923.0,,...,,,OWS,OWS1,Q037,59,Income,"$25,000 - $34,999",INC,INC2535
4108,2011,2011,US,National,Percent of adults aged 18 years and older who ...,37.1,36.3,37.9,61329.0,,...,,,OWS,OWS1,Q037,59,Income,"$35,000 - $49,999",INC,INC3550
2633,2011,2011,US,National,Percent of adults aged 18 years and older who ...,37.5,36.8,38.3,64664.0,,...,,,OWS,OWS1,Q037,59,Income,"$50,000 - $74,999",INC,INC5075
3686,2011,2011,US,National,Percent of adults aged 18 years and older who ...,38.3,37.8,38.9,110079.0,,...,,,OWS,OWS1,Q037,59,Income,"$75,000 or greater",INC,INC75PLUS
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1543,2022,2022,US,National,Percent of adults aged 18 years and older who ...,38.9,38.5,39.4,189865.0,,...,,,OWS,OWS1,Q037,59,Gender,Male,GEN,MALE
2377,2022,2022,US,National,Percent of adults aged 18 years and older who ...,31.6,30.6,32.6,29914.0,,...,Non-Hispanic Black,,OWS,OWS1,Q037,59,Race/Ethnicity,Non-Hispanic Black,RACE,RACEBLK
17884,2022,2022,US,National,Percent of adults aged 18 years and older who ...,34.5,34.2,34.8,289595.0,,...,Non-Hispanic White,,OWS,OWS1,Q037,59,Race/Ethnicity,Non-Hispanic White,RACE,RACEWHT
17891,2022,2022,US,National,Percent of adults aged 18 years and older who ...,33.6,33.0,34.2,105991.0,,...,,,OWS,OWS1,Q037,59,Education,Some college or technical school,EDU,EDUCOTEC


In [49]:
q037_df_sorted = q037_df_trim_cleaned.sort_values(by=['YearStart', 'StratificationCategory1', 'Stratification1', 'LocationDesc'])
q037_df_sorted

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Question,Data_Value,Low_Confidence_Limit,High_Confidence_Limit,Sample_Size,Age(years),...,Race/Ethnicity,GeoLocation,ClassID,TopicID,QuestionID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
10273,2011,2011,AL,Alabama,Percent of adults aged 18 years and older who ...,27.1,21.6,33.3,356.0,18 - 24,...,,"(32.840571122, -86.631860762)",OWS,OWS1,Q037,1,Age (years),18 - 24,AGEYR,AGEYR1824
7950,2011,2011,AK,Alaska,Percent of adults aged 18 years and older who ...,32.0,24.7,40.4,212.0,18 - 24,...,,"(64.845079957001, -147.722059036)",OWS,OWS1,Q037,2,Age (years),18 - 24,AGEYR,AGEYR1824
13664,2011,2011,AZ,Arizona,Percent of adults aged 18 years and older who ...,27.7,20.7,36.1,212.0,18 - 24,...,,"(34.86597028, -111.763811277)",OWS,OWS1,Q037,4,Age (years),18 - 24,AGEYR,AGEYR1824
5365,2011,2011,AR,Arkansas,Percent of adults aged 18 years and older who ...,27.3,19.6,36.7,136.0,18 - 24,...,,"(34.748650124, -92.274490743)",OWS,OWS1,Q037,5,Age (years),18 - 24,AGEYR,AGEYR1824
10633,2011,2011,CA,California,Percent of adults aged 18 years and older who ...,26.8,23.6,30.3,902.0,18 - 24,...,,"(37.638640123, -120.999999538)",OWS,OWS1,Q037,6,Age (years),18 - 24,AGEYR,AGEYR1824
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
17928,2022,2022,VA,Virginia,Percent of adults aged 18 years and older who ...,32.1,30.8,33.5,9378.0,,...,,"(37.54268067400045, -78.45789046299967)",OWS,OWS1,Q037,51,Total,Total,OVR,OVERALL
18006,2022,2022,WA,Washington,Percent of adults aged 18 years and older who ...,34.6,33.7,35.4,22869.0,,...,,"(47.52227862900048, -120.47001078999972)",OWS,OWS1,Q037,53,Total,Total,OVR,OVERALL
18083,2022,2022,WV,West Virginia,Percent of adults aged 18 years and older who ...,33.4,31.7,35.1,4640.0,,...,,"(38.66551020200046, -80.71264013499967)",OWS,OWS1,Q037,54,Total,Total,OVR,OVERALL
18046,2022,2022,WI,Wisconsin,Percent of adults aged 18 years and older who ...,33.0,31.7,34.2,10080.0,,...,,"(44.39319117400049, -89.81637074199966)",OWS,OWS1,Q037,55,Total,Total,OVR,OVERALL
