Test notebook

In [1]:
# Dependencies and Setup
import missingno as msno
import pandas as pd
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FuncFormatter
from scipy import stats
from scipy.stats import linregress
import scipy.stats as st
import sys
import json
import requests

# Load the data file, read and display it with Pandas
DNPAO_all_data = Path("DNPAO_all.csv")

# Read and store into Pandas DataFrames
DNPAO_all = pd.read_csv(DNPAO_all_data)
DNPAO_all.head()

Unnamed: 0,YearStart,YearEnd,LocationAbbr,LocationDesc,Datasource,Class,Topic,Question,Data_Value_Unit,Data_Value_Type,...,GeoLocation,ClassID,TopicID,QuestionID,DataValueTypeID,LocationID,StratificationCategory1,Stratification1,StratificationCategoryId1,StratificationID1
0,2011,2011,AL,Alabama,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,"(32.840571122, -86.631860762)",OWS,OWS1,Q036,VALUE,1,Age (years),25 - 34,AGEYR,AGEYR2534
1,2011,2011,US,National,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,,OWS,OWS1,Q036,VALUE,59,Race/Ethnicity,American Indian/Alaska Native,RACE,RACENAA
2,2011,2011,RI,Rhode Island,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,"(41.708280193, -71.522470314)",OWS,OWS1,Q036,VALUE,44,Race/Ethnicity,Other,RACE,RACEOTH
3,2011,2011,US,National,Behavioral Risk Factor Surveillance System,Physical Activity,Physical Activity - Behavior,Percent of adults who engage in no leisure-tim...,,Value,...,,PA,PA1,Q047,VALUE,59,Age (years),18 - 24,AGEYR,AGEYR1824
4,2011,2011,US,National,Behavioral Risk Factor Surveillance System,Obesity / Weight Status,Obesity / Weight Status,Percent of adults aged 18 years and older who ...,,Value,...,,OWS,OWS1,Q037,VALUE,59,Income,"Less than $15,000",INC,INCLESS15


In [2]:
DNPAO_all.columns

Index(['YearStart', 'YearEnd', 'LocationAbbr', 'LocationDesc', 'Datasource',
       'Class', 'Topic', 'Question', 'Data_Value_Unit', 'Data_Value_Type',
       'Data_Value', 'Data_Value_Alt', 'Data_Value_Footnote_Symbol',
       'Data_Value_Footnote', 'Low_Confidence_Limit', 'High_Confidence_Limit ',
       'Sample_Size', 'Total', 'Age(years)', 'Education', 'Gender', 'Income',
       'Race/Ethnicity', 'GeoLocation', 'ClassID', 'TopicID', 'QuestionID',
       'DataValueTypeID', 'LocationID', 'StratificationCategory1',
       'Stratification1', 'StratificationCategoryId1', 'StratificationID1'],
      dtype='object')

In [3]:
# Trim the columns of data to the one we need
DNPAO_sevencols = DNPAO_all.drop(['YearEnd','Datasource',
       'Class', 'Topic', 'Question', 'Data_Value_Unit', 'Data_Value_Type',
       'Data_Value_Alt', 'Data_Value_Footnote_Symbol',
       'Data_Value_Footnote', 'Low_Confidence_Limit', 'High_Confidence_Limit ',
       'Sample_Size', 'Total', 'Age(years)', 'Education', 'Gender', 'Income',
       'Race/Ethnicity', 'GeoLocation', 'ClassID', 'TopicID',
       'DataValueTypeID', 'LocationID', 'StratificationCategory1',
       'StratificationID1'], axis=1)
# DNPAO_sevencols.to_csv("DNPAO_sevencols.csv", index=False)
DNPAO_sevencols

Unnamed: 0,YearStart,LocationAbbr,LocationDesc,Data_Value,QuestionID,Stratification1,StratificationCategoryId1
0,2011,AL,Alabama,35.2,Q036,25 - 34,AGEYR
1,2011,US,National,35.4,Q036,American Indian/Alaska Native,RACE
2,2011,RI,Rhode Island,32.1,Q036,Other,RACE
3,2011,US,National,16.9,Q047,18 - 24,AGEYR
4,2011,US,National,31.6,Q037,"Less than $15,000",INC
...,...,...,...,...,...,...,...
93244,2022,WY,Wyoming,24.5,Q037,"Less than $15,000",INC
93245,2022,WY,Wyoming,36.0,Q047,Less than high school,EDU
93246,2022,WY,Wyoming,35.2,Q036,35 - 44,AGEYR
93247,2022,WY,Wyoming,35.3,Q037,"$35,000 - $49,999",INC


In [7]:
import json

# Load JSON data from file
with open("Q037_2022_overall.geojson", "r") as file:
    data = json.load(file)

# Iterate through features and convert specific data_value to float
for feature in data["features"]:
    # Assuming data_value is a string
    feature["properties"]["data_value"] = float(feature["properties"]["data_value"])

# Print modified GeoJSON data
print(json.dumps(data, indent=4))

# Write modified GeoJSON data to a new file
output_file_path = "modified_geojson.geojson"
with open(output_file_path, "w") as output_file:
    json.dump(data, output_file, indent=4)

print(f"Modified GeoJSON data has been saved to {output_file_path}")

{
    "type": "FeatureCollection",
    "features": [
        {
            "type": "Feature",
            "geometry": null,
            "properties": {
                "question": "Percent of adults aged 18 years and older who have an overweight classification",
                "low_confidence_limit": "33.8",
                "data_value_unit": null,
                "data_value": 34.1,
                "geolocation_address": null,
                "geolocation_zip": null,
                "locationid": "59",
                "age_years": null,
                "geolocation_state": null,
                "sample_size": "387549",
                "total": "Total",
                "stratificationcategory1": "Total",
                "high_confidence_limit": "34.4",
                "classid": "OWS",
                "education": null,
                "income": null,
                "questionid": "Q037",
                "locationdesc": "National",
                "stratification1": "Total",
         