### Setup Environment:

In [2]:
from src.embeddings import get_embeddings_df
from src.satellite_embeddings import generate_satellite_embeddings_df, get_foundational_satellite_embeddings_df
import pandas as pd

2024-01-12 03:35:07.459910: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-01-12 03:35:07.497459: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


# Embeddings Generation

## Satellite Image Embeddings

To extract the embeddings of the satellite images, we can use a set of foundational models available

In this case we finetuned a Resnet 50 v2 backbone using a variational autoencoder of the 81 municipalities

Here the possible models:  

* 'VGG16'
* 'MobileNetV2'
* 'vit'
* 'autoencoder' 
* 'variational_autoencoder' 
* 'ResNet50V2' 
* 'ConvNeXtTiny'

For autoencoder and variational autoencoder, you can also use a set of backbones like:
* 'vit' 
* 'ResNet50V2' 
* 'ConvNeXtTiny'

The latent dimension are just for variational autoencoder and autoencoder

target_size is the size of the image

Important: To use the autoencoder or variational autoencoder you have to first pre-train a model using the file `train_self-supervised.ipynb`

In [2]:
# Path to Dataset
path = 'datasets/violence_prediction/Satellite/DATASET/'
model_name = 'variational_autoencoder'
encoder_backbone = 'ResNet50V2'
latent_dim = 1024

target_size = (224, 224, 3)

# Model path
model_path = f'Weights/{model_name}_{encoder_backbone}_{target_size[0]}_{latent_dim}_{target_size[2]}Bands_full_dataset.h5'

# Embeddings path
if model_name in ['autoencoder', 'variational_autoencoder']:
    embeddings_path = f'Embeddings/violence/{model_name}/{model_name}_{encoder_backbone}__{target_size[0]}_{latent_dim}_{target_size[2]}Bands.csv'
else:
    embeddings_path = f'Embeddings/violence/{model_name}_{target_size[0]}_{latent_dim}.csv'
    

In [3]:
generate_satellite_embeddings_df(path, model_name, target_size, latent_dim, encoder_backbone, embeddings_path, model_path=model_path, ignore_black=False)

Images in directories: 
Cali
Cúcuta
Villavicencio
Barranquilla
Ibagué
Popayán
Soacha
Bucaramanga
Pasto
Medellín
Instructions for updating:
Colocations handled automatically by placer.


2024-01-12 00:58:14.544774: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-01-12 00:58:14.757481: W tensorflow/c/c_api.cc:304] Operation '{name:'conv5_block1_1_bn/moving_mean/Assign' id:2916 op device:{requested: '', assigned: ''} def:{{{node conv5_block1_1_bn/moving_mean/Assign}} = AssignVariableOp[_has_manual_control_dependencies=true, dtype=DT_FLOAT, validate_shape=false](conv5_block1_1_bn/moving_mean, conv5_block1_1_bn/moving_mean/Initializer/zeros)}}' was changed by setting attribute after it was run by a session. This mutation will have no effect, and will trigger an error in the future. Either don't modify nodes after running them or create a new session.
2024-01-12 00:58:19.791282: W tensorflow/c/c_api.cc:304] Operation '{name:'batch_normalization_2/moving_mean/Assign' id:6501 op device:{requested: '', assigned: ''} def:{{{node batch_normalization_2/moving_mean/Assign}} = AssignVariableOp[_has_manual_control_depende

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (Functional)          (None, 1024)              27761152  
                                                                 
Total params: 27761152 (105.90 MB)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 27761152 (105.90 MB)
_________________________________________________________________


Unnamed: 0,Municipality Code,Date,Embedding
0,Cali,2021-09-12,"[-1.3068234, -0.8891329, -0.82548416, -1.98768..."
1,Cali,2017-07-30,"[-0.8662267, -1.9892781, -0.45083773, -1.58955..."
2,Cali,2020-03-01,"[-0.16271016, 1.2283182, -0.24431898, 0.053710..."
3,Cali,2020-01-26,"[1.0123104, -0.17539583, 0.15753587, -1.503514..."
4,Cali,2019-10-20,"[-1.5866194, 1.0209501, -1.0373107, 0.53583723..."
...,...,...,...
3646,Medellín,2016-09-18,"[-0.07596938, -0.20064315, 1.5155005, -0.16436..."
3647,Medellín,2020-11-22,"[1.9030588, -0.22682817, -0.7058452, 0.6177066..."
3648,Medellín,2020-07-26,"[1.4073528, 1.4124076, 0.81622416, -0.2352677,..."
3649,Medellín,2020-09-20,"[-0.4603149, -0.1990348, 0.94120204, -1.637651..."


### Dino V2 on satellite images
As a second example we will extract the embeddings using Dino V2. 

In [3]:
batch_size = 32
path = 'datasets/violence_prediction/Satellite/DATASET/'
backbone = 'dinov2_large'
out_dir = 'Embeddings'
dataset_name='violence'

get_foundational_satellite_embeddings_df(batch_size=batch_size, path=path, dataset_name=dataset_name, backbone=backbone, directory=out_dir, save=True)

##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10


## BRSET Embeddings

To extract the embeddings of BRSET we provided a set of foundational models that can be used:


* **Batch Size:** Images per batch to convert to embeddings (Adjust depending on your memory)

* **Path:** Path to the images

* **Output Directory:** Directory to save the embeddings

* **Backbone:** Select a backbone from the list of possible backbones:
    * 'dinov2_small'
    * 'dinov2_base'
    * 'dinov2_large'
    * 'dinov2_giant'
    * 'sam_base'
    * 'sam_large'
    * 'sam_huge'
    * 'clip_base',
    * 'clip_large',
    * 'convnextv2_tiny'
    * 'convnextv2_base'
    * 'convnextv2_large'
    * 'convnext_tiny'
    * 'convnext_small'
    * 'convnext_base'
    * 'convnext_large'
    * 'swin_tiny'
    * 'swin_small'
    * 'swin_base'
    * 'vit_base'
    * 'vit_large'
    * 'retfound'

In [3]:
# Foundational Models
dino_backbone = ['dinov2_small', 'dinov2_base', 'dinov2_large', 'dinov2_giant']

sam_backbone = ['sam_base', 'sam_large', 'sam_huge']

clip_backbone = ['clip_base', 'clip_large']

# ImageNet:

### Convnext
convnext_backbone = ['convnextv2_tiny', 'convnextv2_base', 'convnextv2_large'] + ['convnext_tiny', 'convnext_small', 'convnext_base', 'convnext_large']

### Swin Transformer
swin_transformer_backbone = ['swin_tiny', 'swin_small', 'swin_base']

### ViT
vit_backbone = ['vit_base', 'vit_large']

### RetFound
retfound_backbone = ['retfound']

backbones = dino_backbone + clip_backbone + sam_backbone + convnext_backbone + swin_transformer_backbone + vit_backbone + retfound_backbone

print(f'List of possible backbones for BRSET image collection: \n{backbones}')

List of possible backbones for BRSET image collection: 
['dinov2_small', 'dinov2_base', 'dinov2_large', 'dinov2_giant', 'clip_base', 'clip_large', 'sam_base', 'sam_large', 'sam_huge', 'convnextv2_tiny', 'convnextv2_base', 'convnextv2_large', 'convnext_tiny', 'convnext_small', 'convnext_base', 'convnext_large', 'swin_tiny', 'swin_small', 'swin_base', 'vit_base', 'vit_large', 'retfound']


In [3]:
batch_size = 32
path = '/home/datascience/Retina/datasets/BRSET/images/'
backbone = 'dinov2_large'
out_dir = 'Embeddings'
dataset_name='BRSET'

get_embeddings_df(batch_size=batch_size, path=path, dataset_name=dataset_name, backbone=backbone, directory=out_dir)

##################################################  dinov2_large  ##################################################


Using cache found in /home/datascience/.cache/torch/hub/facebookresearch_dinov2_main


Processed batch number: 10
Processed batch number: 20
Processed batch number: 30
Processed batch number: 40
Processed batch number: 50
Processed batch number: 60
Processed batch number: 70
Processed batch number: 80
Processed batch number: 90
Processed batch number: 100
Processed batch number: 110
Processed batch number: 120
Processed batch number: 130
Processed batch number: 140
Processed batch number: 150
Processed batch number: 160
Processed batch number: 170
Processed batch number: 180
Processed batch number: 190
Processed batch number: 200
Processed batch number: 210
Processed batch number: 220
Processed batch number: 230
Processed batch number: 240
Processed batch number: 250
Processed batch number: 260
Processed batch number: 270
Processed batch number: 280
Processed batch number: 290
Processed batch number: 300
Processed batch number: 310
Processed batch number: 320
Processed batch number: 330
Processed batch number: 340
Processed batch number: 350
Processed batch number: 360
P

### Generate MIMIC CXR embeddings

In [None]:
batch_size = 32
path = 'datasets/mimic/images'
dataset = 'mimic'
backbone = 'dinov2_base'
out_dir = 'Embeddings'

get_embeddings_df(batch_size=batch_size, path=path, dataset_name=dataset, backbone=backbone, directory=out_dir)

### Generate HAM 10000 embeddings

In [None]:
batch_size = 32
path = 'datasets/ham10000/images'
dataset = 'ham10000'
backbone = 'dinov2_base'
out_dir = 'Embeddings'

get_embeddings_df(batch_size=batch_size, path=path, dataset_name=dataset, backbone=backbone, directory=out_dir)