In [None]:
from MaskMultimodalTrain import MaskMultimodalWithPretrained
from pipelineClass import MultimodalPipeline
from dotenv import load_dotenv
import os

load_dotenv()
BASE_DIR = os.getenv('FILE_PATH')
if not BASE_DIR:
    raise ValueError("FILE_PATH environment variable not set. Please create a .env file and set it.")

DATA_PATH = os.path.join(BASE_DIR, 'Full_preprocessed_detailed_house.csv')
print(f"Base Directory: {BASE_DIR}")
print(f"Data CSV Path: {DATA_PATH}")

numeric_cols = [ 'procent_ingenomen', 'area', 'perimeter', 'elongation',  'compactness', 'huisnr_bag_letter']
categorical_cols = ['build_type']

pipeline = MultimodalPipeline(
    model_class=MaskMultimodalWithPretrained,
    csv_path=DATA_PATH,
    image_base_dir=BASE_DIR,
    image_col='frontview_url',
    target_col='woningtype',
    numeric_cols=numeric_cols,
    categorical_cols=categorical_cols,
    epochs=1, 
    lr=1e-4,
    batch_size=32,
    useMask=True
)

pipeline.model = pipeline.load_saved_model('models/best_housing_classifier_70.pth', evaluate=False)


  from shapely.errors import WKTReadingError


Base Directory: ../../Data/
Data CSV Path: ../../Data/Full_preprocessed_detailed_house.csv
Using device: cuda
--- Preparing Data ---
Splitting data...
Found 10 classes: ['2-onder-1-kapwoning', 'Bovenwoning/Benedenwoning/Maisonette', 'Corridorflat/Galerijflat', 'Geschakelde 2-onder-1-kapwoning', 'Geschakelde woning', 'Halfvrijstaande woning', 'Hoekwoning/Eindwoning', 'Portiekflat/Portiekwoning', 'Tussenwoning', 'Vrijstaande woning']
Preprocessing tabular features...
Total tabular features: 7
Preprocessing objects saved to preprocessors/
Creating Datasets and DataLoaders...
Dropped 41 rows due to missing image paths.
Dropped 14 rows due to missing image paths.
Dropped 19 rows due to missing image paths.
Train size: 3957 | Val size: 1319 | Test size: 1319
--- Data Preparation Complete ---


In [None]:
for predicted_label, prob_dist, predicted_idx in pipeline.classify(input_csv_name='input.csv', input_image_dir='images', threshold = 0.6):
    if predicted_label:
        print(f"The predicted type for the new entry is: {predicted_label}")
        print(f"prob dist: {prob_dist}")
        print(f"predicted class index: {predicted_idx}")


--- Starting Classification of New Data Entry ---
Classification complete. Predicted class: Uncertain: 0.5379729270935059
The predicted type for the new entry is: Uncertain: 0.5379729270935059
prob dist: [0.10873066633939743, 0.033931221812963486, 0.017357900738716125, 0.037985336035490036, 0.062166403979063034, 0.09970474988222122, 0.5379729270935059, 0.011423717252910137, 0.06279473751783371, 0.02793235331773758]
6
