In [None]:
# ==================================================
# Project Title: (do this as markdown above)
# ==================================================

# Created by: Your Name
# Date: Today's Date
# Description: This script automates the deployment of a web application.


In [None]:
# ==================================================
# Setup Basic DevOps
# ==================================================

# Create GitHub repo
#   via GitHub website 
# Create directory for experiment
#   mkdir folder_name
# Initialize Git repository
#   git init
# Create a virtual environment
#   python -m venv venv
#   source venv/bin/activate
# Install dependencies (pyyaml, ipykernel, )
#    pip install -r requirements.txt
# Set up experiment folder
# Set working directory


In [9]:
# ==================================================
# Experiment Module Setup 
# ==================================================

import os
import yaml 

def create_experiment_modules (yaml_path):

    """
    Creates experiment modules based on a YAML configuration file.

    Args:
        yaml_path (str): Path to the YAML configuration file.
    Returns: 
        None 
    """
    # Error handling
    if not os.path.exists(yaml_path):
        raise FileNotFoundError(f"YAML file not found: {yaml_path}")   
    
    with open(yaml_path, 'r') as file:
        module_config = yaml.safe_load(file)
    
    assert module_config is not None, "Failed to load YAML configuration." 
    assert "module_names" in module_config, "'module_names' key is missing in the YAML configuration."
    assert module_config["module_names"], "Module names cannot be empty."

    module_names = module_config["module_names"]
    for module_name, subnames in module_names.items():
        if module_name:
            os.makedirs(module_name, exist_ok=True)
            init_file_path = os.path.join(module_name, "__init__.py")
            if not os.path.exists(init_file_path):
                with open(init_file_path, 'w') as init_file:
                    init_file.write("# =============== Make this a module========================= \n")

        if subnames:
            for subname in subnames:
                py_file_path = os.path.join(module_name, f"{subname}.py")
                if not os.path.exists(py_file_path):
                    with open(py_file_path, 'w') as py_file:
                        py_file.write("# ======================================== \n")
                        py_file.write(f"# {subname} module in {module_name}\n")
                        py_file.write("# ======================================== \n")

    other_assets = ["README.md", "requirements.txt", ".env", "app.py", ".gitlab-ci.yml", "TODOs.md", "release_notes.md"]
    for asset in other_assets:
        asset_path = os.path.join(".", asset)
        if not os.path.exists(asset_path):
            with open(asset_path, 'w') as a:
                a.write(f"# {asset} file\n")

create_experiment_modules("experiment_modules.yaml")


In [None]:
# ==================================================
# Set working directory
# ==================================================


In [None]:
# ==================================================
# Dependencies
# ==================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, RobustScaler
from sklearn.metrics import classification_report, confusion_matrix



In [3]:
# ===============================================
# Data Ingestion
# ===============================================

# Load the data from the CSV file
ukhsa_labs_ref_df = pd.read_csv('01_data_ingestion_cleaning/lab_details.csv')
tests_df = pd.read_csv('01_data_ingestion_cleaning/testing_data.csv')




In [4]:
ukhsa_labs_ref_df.head()


Unnamed: 0,lab_code,lab_lat_lng,lab_loc
0,BI4658,"52.4691353, -1.8797023",Birmingham
1,CA477T,"52.2208471, 0.1156700",Cambridge
2,BR46679,"53.8124860, -1.7592994",Bradford
3,LO4685,"51.3885652, -0.4435039\t",London
4,MA7896,"53.4740918, -2.2551303",Manchester


In [5]:
np.random.seed(42)