# Notebook Initialization

In [1]:
%%writefile ../config/tfg.conf
tfg {
    seed: 42
    eda {
        csv_options: {
            "header": "true"
            "sep": ","
            "inferSchema": "true"
        }
    }
    training {
        test_fraction: 0.1,
        val_fraction: 0.1,
        images_per_clip: 70,
        batch_size: 32
    }
}

data {
    "train_files_df" : { path: "preprocessed/training_files_df.csv" }
    "test_files_df" : { path: "preprocessed/test_files_df.csv" }
}


Overwriting ../config/tfg.conf


In [2]:
%%writefile nb_init.py

# %load nb_init.py

from pathlib import Path
import pandas as pd

base_dir = Path.cwd().parent
config_dir = base_dir / "config"
data_dir = base_dir / "data"
docs_dir = base_dir / "docs"
figures_dir = docs_dir / "figures"
models_dir = base_dir / "models"
logs_dir = base_dir / "logs"
images_input_dir = data_dir / "COVID19"
images_pp_dir = data_dir / "COVID19-preprocessed"
preprocessed_dir = data_dir / "preprocessed"
output_dir = data_dir / "output"

# Directories used to train the CNN (image by image) 
cnn_data_dir = data_dir / "modelling" / "cnn"
cnn_train_dir = cnn_data_dir / "train"
cnn_test_dir = cnn_data_dir / "test"

metadata_file = images_input_dir / "metadata.csv"
labels_file = images_input_dir / "unzip_filenames.csv"
preprocessed_labels_file = preprocessed_dir / "labels.parquet"

feature_extractor_model_file = models_dir / "feature_extractor.tf"

from pyspark.sql import SparkSession
from pyspark.sql import functions as F

spark = SparkSession.builder.getOrCreate()

config_file = config_dir / "tfg.conf"

from pyhocon import ConfigFactory
config = None

def load_config():
    return ConfigFactory.parse_file(config_file)

config = load_config()
    
import sys

if str(base_dir / "src") not in sys.path:
    sys.path.append(str(base_dir / "src"))

%load_ext autoreload

%autoreload 2

Overwriting nb_init.py
