# Notebook Intentions

We want to utilitize the accessability of Django without compromising the integrity of the MEPS data. While it would be ideal to merge all common data types of differing years into a single model that may cause unintended side-effects. Variable fields are added or removed across years, their value ranges can change or their description changes. We have decided to generate a seperate model for each data type for each year.

The model.py configuration file will be quite large but not very complex. This notebook will generate code that can be placed directly into the models.py file, dervived from the variable parameters files.

In [1]:
import os
from os.path import expanduser

import sys
sys.path.append(os.path.join(expanduser("~"), "meps", "meps_dev"))

from meps_db.components.populators import BaseComponentsPopulator
from meps_db.components.reference import DATA_FILES_YEARS
from meps_db.utilities.universal_utilities import UniversalUtilityFunctions as util

from meps_db.components.reference import (
    FYPCDF_PUF_LOOKUP,
    MCDF_PUF_LOOKUP,
    PMDF_PUF_LOOKUP,
    DVDF_PUF_LOOKUP,
    OMEDF_PUF_LOOKUP,
    HISDF_PUF_LOOKUP,
    ERVDF_PUF_LOOKUP,
    OVDF_PUF_LOOKUP,
    OBMPVDF_PUF_LOOKUP,
    HHDF_PUF_LOOKUP,
)

In [2]:
folders = {
    "population_characteristics": {
        "lookup_dict": FYPCDF_PUF_LOOKUP,
        "base_model_name": "PopulationCharacteristics",
        "verbose_name": '"PopulationCharacteristics"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "medical_conditions": {
        "lookup_dict": MCDF_PUF_LOOKUP,
        "base_model_name": "MedicalConditions",
        "verbose_name": '"MedicalConditions"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "prescribed_medicines": {
        "lookup_dict": PMDF_PUF_LOOKUP,
        "base_model_name": "PrescribedMedicines",
        "verbose_name": '"PrescribedMedicines"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "dental_visits": {
        "lookup_dict": DVDF_PUF_LOOKUP,
        "base_model_name": "DentalVisits",
        "verbose_name": '"DentalVisits"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "other_medical_expenses": {
        "lookup_dict": OMEDF_PUF_LOOKUP,
        "base_model_name": "OtherMedicalExpenses",
        "verbose_name": '"OtherMedicalExpenses"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "hospital_inpatient_stays": {
        "lookup_dict": HISDF_PUF_LOOKUP,
        "base_model_name": "HospitalInpatientStays",
        "verbose_name": '"HospitalInpatientStays"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "emergency_room_visits": {
        "lookup_dict": ERVDF_PUF_LOOKUP,
        "base_model_name": "EmergencyRoomVisits",
        "verbose_name": '"EmergencyRoomVisits"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "outpatient_visits": {
        "lookup_dict": OVDF_PUF_LOOKUP,
        "base_model_name": "OutpatientVisits",
        "verbose_name": '"OutpatientVisits"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "office_based_visits": {
        "lookup_dict": OBMPVDF_PUF_LOOKUP,
        "base_model_name": "OfficeBasedVisits",
        "verbose_name": '"OfficeBasedVisits"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
    "home_health": {
        "lookup_dict": HHDF_PUF_LOOKUP,
        "base_model_name": "HomeHealth",
        "verbose_name": '"HomeHealth"',
        "ordering": ["DUPERSID"],
        "__str__fields": ["self.DUPERSID"]
    },
}

In [5]:
for folder, data_type_dict in folders.items():
    model_text = ""
    for year in DATA_FILES_YEARS:
        
        variable_parameters_path = BaseComponentsPopulator.get_variable_parameters_path(
            folder=folder, 
            year=year, 
            year_lookup=data_type_dict["lookup_dict"],
        )
        
        variable_parameters = util.load_data_from_file(
            file_path=variable_parameters_path,
        )
        
        
        model_text += f"class {data_type_dict['base_model_name']}{str(year)[-2:]}(models.Model):\n"
        model_text += f'    """ Defines the {data_type_dict["base_model_name"]} Model for {year}, derived from the model class. """\n' 
        model_text += "\n"
        model_text += "    # Metadata\n"
        model_text += "    class Meta:\n"
        model_text += '        """ Set parameters for admin app"""\n'
        model_text += f"        ordering = {data_type_dict['ordering']}\n"
        model_text += f'        verbose_name_plural = {data_type_dict["verbose_name"]}\n'
        model_text += f''
        
        for var, var_dict in variable_parameters.items():
            # handle edge case in the 2011 FYCDF where there are more than 2000 columns, this
            # removes ~300 that don't exist in other years
            if year == 2011:
                if "CSAQ: " not in var_dict['description']:
                    model_text += f"    {var} = models.CharField('{var_dict['description'].replace('/', '')}',max_length={var_dict['max']})\n"
            else:
                model_text += f"    {var} = models.CharField('{var_dict['description'].replace('/', '')}',max_length={var_dict['max']})\n"
            
        model_text += "\n"
        model_text += "    # Methods\n"
        model_text += "    def __str__(self):\n"
        model_text += f'        """String for representing a {data_type_dict["base_model_name"]}{str(year)[-2:]} object"""\n'
        model_text += f'        return f"{{{"_".join(data_type_dict["__str__fields"])}}}"\n\n'
        
    text_file = open(f"{folder}.txt", "w")
    text_file.write(model_text)
    text_file.close()


AssertionError: File not found: /home/mike/meps/meps_data/office_based_visits/h135g_parameters.json

In [None]:
for year in DATA_FILES_YEARS:
    variable_parameters_path = BaseComponentsPopulator.get_variable_parameters_path(
        folder="consolidated", 
        year=year, 
        year_lookup=FYCDF_PUF_LOOKUP,
    )

    variable_parameters = util.load_data_from_file(
        file_path=variable_parameters_path,
    )
    
    print(f"{year}: {len(variable_parameters.keys())}")