# Install `simex`

In [22]:
%%bash
#clone repo of `sipecam-metadata-extactor` in server
#git clone git@github.com:CONABIO/sipecam-metadata-extractor.git ~/
cd ../src
#install in editable mode
pip install -q -e .



# Or `docker run`

``` 
SIMEX_VERSION=0.1
REPO_URL=sipecam/simex
CONTAINER_NAME=sipecam-simex
docker run --rm -v $HOME:/shared_volume --name $CONTAINER_NAME -p 3000:8888 -d $REPO_URL:$SIMEX_VERSION /usr/local/bin/jupyter lab --ip=0.0.0.0 --no-browser --allow-root
```

# Copy files for testing

In [1]:
%%bash
rm -r /LUSTRE/sacmod/SIPECAM/testing_simex/*

In [2]:
%%bash
cp -r /LUSTRE/sacmod/SIPECAM/testing_simex_backup/* /LUSTRE/sacmod/SIPECAM/testing_simex/

# List of files to extract metadata

In [3]:
%%bash
dir_with_sipecam_data=/LUSTRE/sacmod/SIPECAM/testing_simex/
list_of_files_and_subdirectories_to_extract_metadata --input_directory $dir_with_sipecam_data

## Check

In [4]:
%%bash
ls -lh /shared_volume/sipecam_files_to_extract_metadata_from_26-06-2022.txt

-rw-r--r-- 1 root root 1.1K Jun 26 13:19 /shared_volume/sipecam_files_to_extract_metadata_from_26-06-2022.txt


In [5]:
%%bash
ls -lh /shared_volume/sipecam_subdirectories_26-06-2022.txt

-rw-r--r-- 1 root root 146 Jun 26 13:19 /shared_volume/sipecam_subdirectories_26-06-2022.txt


In [6]:
%%bash
wc -l /shared_volume/sipecam_files_to_extract_metadata_from_26-06-2022.txt

17 /shared_volume/sipecam_files_to_extract_metadata_from_26-06-2022.txt


In [7]:
%%bash
wc -l /shared_volume/sipecam_subdirectories_26-06-2022.txt

3 /shared_volume/sipecam_subdirectories_26-06-2022.txt


In [8]:
%%bash
#check every first subdirectory has a JPG, WAV or AVI file
SAVEIFS=$IFS;IFS=$(echo -en "\n\b")
for f in $(cat /shared_volume/sipecam_subdirectories_26-06-2022.txt)
  do echo "$f"
     ls "$f"|grep "[WAV|wav|JPG|jpg|AVI|avi]$"|head -n 1
done
IFS=$SAVEIFS

/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra
20210904_180000.WAV
/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible
20211114_110000.WAV
/LUSTRE/sacmod/SIPECAM/testing_simex/images
RCNX0001.JPG


# Generate SiPeCaM Zendro schema (one time only)

In [9]:
%%bash
echo "SIPECAM_ZENDRO_GQL_URL=https://gql.sipecamdata.conabio.gob.mx/" > ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_USER=<user>" >> ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_PASSWORD=<password>" >> ~/.simex_env


In [13]:
%%bash
generate_sipecam_zendro_schema

Standard Error sgqlc.introspection b''
Standard Output sgqlc.introspection b'\n'
Standard Error sgqlc-codegen b''
Standard Output sgqlc-codegen b''


## Check

In [8]:
%%bash
head -n 10 /root/sipecam-metadata-extractor/src/simex/sipecam_zendro_schema.py

import sgqlc.types
import sgqlc.types.datetime


sipecam_zendro_schema = sgqlc.types.Schema()



########################################################################
# Scalars and Enumerations


# Extract serial number, dates and metadata of files and device

In [9]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
#default for parallel execution are 4 processes
extract_serial_numbers_dates_and_metadata_of_files_and_device --input_dir "$dir_to_be_processed" --parallel

In [10]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
#can select number of proceses
extract_serial_numbers_dates_and_metadata_of_files_and_device --input_dir "$dir_to_be_processed" --parallel --number_of_processes 6

In [11]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
#if number_of_processes is passed as arg then is assumed will execute in parallel
extract_serial_numbers_dates_and_metadata_of_files_and_device --input_dir "$dir_to_be_processed" --number_of_processes 6

## Check

### Dir with image & video files

In [12]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
file_for_logs=$(find "$dir_to_be_processed" -name "logs_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.logs")
head -n 15 "$file_for_logs"

2022-06-26 13:19:22,761 [INFO]  extraction of serial_numbers_dates_and_metadata_of_files_and_device
2022-06-26 13:19:22,763 [INFO]  logs for extraction of serial_numbers_dates_and_metadata_of_files_and_device in /LUSTRE/sacmod/SIPECAM/testing_simex/images/images_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.json
2022-06-26 13:19:22,768 [INFO]  extraction of metadata of device from /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG
2022-06-26 13:19:22,991 [INFO]  SUCCESSFUL extraction of serial number of /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG
2022-06-26 13:19:23,420 [INFO]  there were no GPS metadata associated with file /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG, returning empty string
2022-06-26 13:19:24,843 [INFO]  extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG
2022-06-26 13:19:24,843 [INFO]  SUCCESSFUL extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG
2022-06-26 1

In [13]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
file_json=$(find "$dir_to_be_processed" -name "*.json")
python3 -mjson.tool "$file_json"|head -n 80

{
    "MetadataDevice": {
        "Make": "RECONYX",
        "Model": "HF2 PRO COVERT",
        "SerialNumber": "HLPXGM09048594"
    },
    "MetadataFiles": {
        "/LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0014.JPG": {
            "Make": "RECONYX",
            "Model": "HF2 PRO COVERT",
            "DateTimeOriginal": "2021:11:05 12:52:13",
            "SerialNumber": "HLPXGM09048594",
            "FileSize": "625 kB",
            "ExifByteOrder": "Little-endian (Intel, II)",
            "ImageWidth": 2048,
            "ImageHeight": 1440,
            "EncodingProcess": "Baseline DCT, Huffman coding",
            "BitsPerSample": 8,
            "ColorComponents": 3,
            "YCbCrSubSampling": "YCbCr4:2:2 (2 1)",
            "XResolution": 72,
            "YResolution": 72,
            "ResolutionUnit": "inches",
            "YCbCrPositioning": "Co-sited",
            "ExposureTime": "1/51",
            "ISO": 400,
            "TimeZoneOffset": 0,
            "Components

In [14]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
file_json=$(find "$dir_to_be_processed" -name "*.json")
python3 -mjson.tool "$file_json"|tail -n 50

            "GPSLatitudeRef": "",
            "GPSLongitudeRef": "",
            "GPSLatitude": "",
            "GPSLongitude": "",
            "SerialNumber": "HLPXGM09048594"
        },
        "/LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0015.AVI": {
            "BitRate": "12.3 Mbit/sec",
            "Endianness": "Little endian",
            "VideoBitsPerPixel": "24",
            "VideoCompression": "Motion JPEG DIB (fourcc:\"MJPG\")",
            "AudioSampleRate": "16.0 kHz",
            "AudioCompressionRate": "1.0x",
            "AudioCompression": "Microsoft Pulse Code Modulation (PCM)",
            "AudioBitRate": "256.4 Kbit/sec",
            "FileSize": "16 MB",
            "DateTimeOriginal": "2021:11:05 12:52:14",
            "BMPVersion": "Windows V3",
            "ImageWidth": 1024,
            "ImageHeight": 720,
            "Planes": 1,
            "ImageLength": 2211840,
            "PixelsPerMeterX": 0,
            "PixelsPerMeterY": 0,
            "NumColors":

### Dir with audio files

In [15]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
file_for_logs=$(find "$dir_to_be_processed" -name "logs_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.logs")
head -n 15 "$file_for_logs"

2022-06-26 13:19:25,317 [INFO]  extraction of serial_numbers_dates_and_metadata_of_files_and_device
2022-06-26 13:19:25,318 [INFO]  logs for extraction of serial_numbers_dates_and_metadata_of_files_and_device in /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/audios_audible_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.json
2022-06-26 13:19:25,321 [INFO]  extraction of metadata of device from /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.WAV
2022-06-26 13:19:25,767 [INFO]  SUCCESSFUL extraction of serial number of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.WAV
2022-06-26 13:19:25,768 [INFO]  GPSFile key not found for videos, files from directory are of audio
2022-06-26 13:19:27,597 [INFO]  extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.WAV
2022-06-26 13:19:27,598 [INFO]  SUCCESSFUL extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.W

In [16]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
file_json=$(find "$dir_to_be_processed" -name "*.json")
python3 -mjson.tool "$file_json" | head -n 40

{
    "MetadataDevice": {
        "Artist": "AudioMoth 24E1440360371EA1",
        "SerialNumber": "24E1440360371EA1"
    },
    "MetadataFiles": {
        "/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.WAV": {
            "Battery": "4.7V",
            "Datetime": "11:00:00 14/11/2021 (UTC-0600)",
            "Gain": "medium",
            "Timezone": "UTC",
            "BitRate": "768.0 Kbit/sec",
            "SerialNumber": "24E1440360371EA1",
            "FileSize": "5.8 MB",
            "Encoding": "Microsoft PCM",
            "NumChannels": 1,
            "SampleRate": 48000,
            "AvgBytesPerSec": 96000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 11:00:00 14/11/2021 (UTC-6) by AudioMoth 24E1440360371EA1 at medium gain setting while battery state was 4.7V and temperature was 29.8C.",
            "Duration": 60.0050833333333
        },
        "/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211206_044000.WAV": {
         

In [17]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
file_json=$(find "$dir_to_be_processed" -name "*.json")
python3 -mjson.tool "$file_json" | tail -n 30

            "NumChannels": 1,
            "SampleRate": 48000,
            "AvgBytesPerSec": 96000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 04:50:00 06/12/2021 (UTC-6) by AudioMoth 24E1440360371EA1 at medium gain setting while battery state was 4.2V and temperature was 24.0C.",
            "Duration": 60.0050833333333
        },
        "/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211206_060000.WAV": {
            "Battery": "4.2V",
            "Datetime": "06:00:00 06/12/2021 (UTC-0600)",
            "Gain": "medium",
            "Timezone": "UTC",
            "BitRate": "768.0 Kbit/sec",
            "SerialNumber": "24E1440360371EA1",
            "FileSize": "1769 kB",
            "Encoding": "Microsoft PCM",
            "NumChannels": 1,
            "SampleRate": 48000,
            "AvgBytesPerSec": 96000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 06:00:00 06/12/2021 (UTC-6) by AudioMoth 24E1440360371EA1 at medium gai

### Dir with audio files

In [18]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
file_for_logs=$(find "$dir_to_be_processed" -name "logs_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.logs")
head -n 15 "$file_for_logs"

2022-06-26 13:19:28,070 [INFO]  extraction of serial_numbers_dates_and_metadata_of_files_and_device
2022-06-26 13:19:28,072 [INFO]  logs for extraction of serial_numbers_dates_and_metadata_of_files_and_device in /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/audios_ultra_simex_extract_serial_numbers_dates_and_metadata_of_files_and_device.json
2022-06-26 13:19:28,075 [INFO]  extraction of metadata of device from /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_054000.WAV
2022-06-26 13:19:28,519 [INFO]  SUCCESSFUL extraction of serial number of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_054000.WAV
2022-06-26 13:19:28,520 [INFO]  GPSFile key not found for videos, files from directory are of audio
2022-06-26 13:19:29,472 [INFO]  extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_054000.WAV
2022-06-26 13:19:29,473 [INFO]  SUCCESSFUL extraction of date of /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_054000.WAV
2022-06-2

In [19]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
file_json=$(find "$dir_to_be_processed" -name '*.json')
python3 -mjson.tool "$file_json" | head -n 40

{
    "MetadataDevice": {
        "Artist": "AudioMoth 24E4C30453E1BE08",
        "SerialNumber": "24E4C30453E1BE08"
    },
    "MetadataFiles": {
        "/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_054000.WAV": {
            "Battery": "3.2V",
            "Datetime": "05:40:00 02/10/2021 (UTC-0600)",
            "Gain": "medium",
            "Timezone": "UTC",
            "BitRate": "6.1 Mbit/sec",
            "SerialNumber": "24E4C30453E1BE08",
            "FileSize": "488 bytes",
            "Encoding": "Microsoft PCM",
            "NumChannels": 1,
            "SampleRate": 384000,
            "AvgBytesPerSec": 768000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 05:40:00 02/10/2021 (UTC-6) by AudioMoth 24E4C30453E1BE08 at medium gain setting while battery state was 3.2V and temperature was 20.2C. Recording cancelled before completion due to low voltage.",
            "Duration": "0.000635416666666667"
        },
        "/LUSTRE/sacmod/SIPEC

In [20]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
file_json=$(find "$dir_to_be_processed" -name '*.json')
python3 -mjson.tool "$file_json" | tail -n 30

            "NumChannels": 1,
            "SampleRate": 384000,
            "AvgBytesPerSec": 768000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 05:20:00 02/10/2021 (UTC-6) by AudioMoth 24E4C30453E1BE08 at medium gain setting while battery state was 3.2V and temperature was 20.4C. Recording cancelled before completion due to low voltage.",
            "Duration": "0.000635416666666667"
        },
        "/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20210904_180000.WAV": {
            "Battery": "4.2V",
            "Datetime": "18:00:00 04/09/2021 (UTC-0600)",
            "Gain": "medium",
            "Timezone": "UTC",
            "BitRate": "6.1 Mbit/sec",
            "SerialNumber": "24E4C30453E1BE08",
            "FileSize": "23 MB",
            "Encoding": "Microsoft PCM",
            "NumChannels": 1,
            "SampleRate": 384000,
            "AvgBytesPerSec": 768000,
            "BitsPerSample": 16,
            "Comment": "Recorded at 18:00:00 

# Move files to standard directory

In [None]:
%%bash
echo "SIPECAM_ZENDRO_GQL_URL=https://gql.sipecamdata.conabio.gob.mx/" > ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_USER=<user>" >> ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_PASSWORD=<password>" >> ~/.simex_env

## Dir with images & videos

**For this need to have in `move` cli, MAX_NUMBER_OF_DAYS = 40**

In [21]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
move_files_to_standard_directory --directory_with_file_of_serial_number_and_dates "$dir_to_be_processed" --path_for_standard_directory "$path_std_dir"

## Check it

In [22]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/images/"
file_for_logs=$(find "$dir_to_be_processed" -name 'logs_simex_move_files_to_standard_directory.logs')
head -n 75 "$file_for_logs"

2022-06-26 13:19:45,411 [INFO]  Dir /LUSTRE/sacmod/SIPECAM/testing_simex/images/ has serial number HLPXGM09048594
2022-06-26 13:19:45,412 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0002.JPG has date 2021-10-21
2022-06-26 13:19:45,412 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/images/RCNX0015.AVI has date 2021-11-05
2022-06-26 13:19:45,412 [INFO]  DaysBetweenFirstAndLastDate: 15
2022-06-26 13:19:45,568 [INFO]  Query to Zendro GQL: query {
  physical_devices(pagination: {limit: 0}, search: {field: serial_number, value: "HLPXGM09048594", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}, search: {operator: and, search: [{field: date_deployment, value: "2021-10-21", valueType: String, operator: gte}, {field: date_deployment, value: "2021-11-05", valueType: String, operator: lte}]}) {
      node {
        nomenclatura
        cat_integr
        ecosystems {
          name
        }
      }
      cumulus {
        name
        geometry
      }
  

In [23]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
dir_images=$(find "$path_std_dir" -name "images_videos")
file=$(find "$dir_images" -name "*.json")
python3 -mjson.tool "$file"|head -n 100

{
    "DaysBetweenFirstAndLastDate": 15,
    "MetadataDevice": {
        "Make": "RECONYX",
        "Model": "HF2 PRO COVERT",
        "SerialNumber": "HLPXGM09048594",
        "NomenclatureNode": "3_92_1_1334",
        "CumulusName": "92",
        "CentroidCumulusLatitude": 16.17538,
        "CentroidCumulusLongitude": -90.87685,
        "DateDeployment": "2021-10-08",
        "EcosystemsName": "Selvas humedas",
        "Latitude": 16.264693,
        "Longitude": -90.9371013,
        "NodeCategoryIntegrity": "Integro"
    },
    "MetadataFiles": {
        "/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_1_1334/HLPXGM09048594/2021-10-08/images_videos/6ec862f8274e86d7757aedc27acc0293_0014.JPG": {
            "Make": "RECONYX",
            "Model": "HF2 PRO COVERT",
            "DateTimeOriginal": "2021:11:05 12:52:13",
            "SerialNumber": "HLPXGM09048594",
            "FileSize": "625 kB",
            "ExifByteOrder": "Little-endian (Intel, II)",
            "

In [24]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
dir_images=$(find "$path_std_dir" -name "images_videos")
file=$(find "$dir_images" -name "*.json")
python3 -mjson.tool "$file"|tail -n 170

            "Megapixels": 2.94912,
            "GPSLatitudeRef": "North",
            "GPSLongitudeRef": "West",
            "GPSLatitude": 16.264693,
            "GPSLongitude": -90.9371013,
            "CentroidCumulusLatitude": 16.17538,
            "CentroidCumulusLongitude": -90.87685
        },
        "/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_1_1334/HLPXGM09048594/2021-10-08/images_videos/4619170f846c449ca1d9a613ec46fcd3_0001.JPG": {
            "Make": "RECONYX",
            "Model": "HF2 PRO COVERT",
            "DateTimeOriginal": "2021:10:21 12:20:26",
            "SerialNumber": "HLPXGM09048594",
            "FileSize": "547 kB",
            "ExifByteOrder": "Little-endian (Intel, II)",
            "ImageWidth": 2048,
            "ImageHeight": 1440,
            "EncodingProcess": "Baseline DCT, Huffman coding",
            "BitsPerSample": 8,
            "ColorComponents": 3,
            "YCbCrSubSampling": "YCbCr4:2:2 (2 1)",
            "XResolu

In [25]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/files_moved_with_simex"
txt_file=$(find "$path_std_dir" -name "*.txt")
cat "$txt_file"

/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_1_1334/HLPXGM09048594/2021-10-08/images_videos


## Dir with audio files

**For this need to have in `move` cli, MAX_NUMBER_OF_DAYS = 60**

In [32]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
move_files_to_standard_directory --directory_with_file_of_serial_number_and_dates "$dir_to_be_processed" --path_for_standard_directory "$path_std_dir"

## Check it

In [33]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible"
file_for_logs=$(find "$dir_to_be_processed" -name 'logs_simex_move_files_to_standard_directory.logs')
head -n 100 "$file_for_logs"

2022-06-26 13:22:06,822 [INFO]  Dir /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible has serial number 24E1440360371EA1
2022-06-26 13:22:06,823 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211114_110000.WAV has date 2021-11-14
2022-06-26 13:22:06,823 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/audios_audible/20211206_060000.WAV has date 2021-12-06
2022-06-26 13:22:06,823 [INFO]  DaysBetweenFirstAndLastDate: 22
2022-06-26 13:22:06,976 [INFO]  Query to Zendro GQL: query {
  physical_devices(pagination: {limit: 0}, search: {field: comments, value: "ADM24E1440360371EA1", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}, search: {operator: and, search: [{field: date_deployment, value: "2021-11-14", valueType: String, operator: gte}, {field: date_deployment, value: "2021-12-06", valueType: String, operator: lte}]}) {
      node {
        nomenclatura
        cat_integr
        ecosystems {
          name
        }
      }
      cumulus {
 

In [34]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
dir_audios=$(find "$path_std_dir" -name "24E1440360371EA1")
file=$(find "$dir_audios" -name "*.json")
python3 -mjson.tool "$file"|head -n 100

{
    "DaysBetweenFirstAndLastDate": 22,
    "MetadataDevice": {
        "Artist": "AudioMoth 24E1440360371EA1",
        "SerialNumber": "24E1440360371EA1",
        "NomenclatureNode": "3_92_0_1343",
        "CumulusName": "92",
        "CentroidCumulusLatitude": 16.17538,
        "CentroidCumulusLongitude": -90.87685,
        "DateDeployment": "2021-10-09",
        "EcosystemsName": "Selvas humedas",
        "Latitude": 16.1087878,
        "Longitude": -90.9012417,
        "NodeCategoryIntegrity": "Degradado"
    },
    "MetadataFiles": {
        "/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_0_1343/24E1440360371EA1/2021-10-09/audios/Audible/bae0c0a09ebb9a4318949f6f14713e29.WAV": {
            "Battery": "4.7V",
            "Datetime": "11:00:00 14/11/2021 (UTC-0600)",
            "Gain": "medium",
            "Timezone": "UTC",
            "BitRate": "768.0 Kbit/sec",
            "SerialNumber": "24E1440360371EA1",
            "FileSize": "5.8 MB",
            "E

In [35]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
txt_file=$(find "$path_std_dir" -name "*.txt")
cat "$txt_file"

/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_1_1334/HLPXGM09048594/2021-10-08/images_videos
/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_0_1343/24E1440360371EA1/2021-10-09/audios/Audible


## Dir with audio files

**For this need to have in `move` cli, MAX_NUMBER_OF_DAYS = 40**

In [36]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
move_files_to_standard_directory --directory_with_file_of_serial_number_and_dates "$dir_to_be_processed" --path_for_standard_directory "$path_std_dir"

## Check it

In [37]:
%%bash
dir_to_be_processed="/LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra"
file_for_logs=$(find "$dir_to_be_processed" -name 'logs_simex_move_files_to_standard_directory.logs')
head -n 100 "$file_for_logs"

2022-06-26 13:23:17,909 [INFO]  Dir /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra has serial number 24E4C30453E1BE08
2022-06-26 13:23:17,910 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20210904_182000.WAV has date 2021-09-04
2022-06-26 13:23:17,910 [INFO]  File /LUSTRE/sacmod/SIPECAM/testing_simex/audios_ultra/20211002_052000.WAV has date 2021-10-02
2022-06-26 13:23:17,910 [INFO]  DaysBetweenFirstAndLastDate: 28
2022-06-26 13:23:18,066 [INFO]  Query to Zendro GQL: query {
  physical_devices(pagination: {limit: 0}, search: {field: comments, value: "ADM24E4C30453E1BE08", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}, search: {operator: and, search: [{field: date_deployment, value: "2021-09-04", valueType: String, operator: gte}, {field: date_deployment, value: "2021-10-02", valueType: String, operator: lte}]}) {
      node {
        nomenclatura
        cat_integr
        ecosystems {
          name
        }
      }
      cumulus {
       

In [38]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
dir_audios=$(find "$path_std_dir" -name "24E4C30453E1BE08")
file=$(find "$dir_audios" -name "*.json")
python3 -mjson.tool "$file"|head -n 100

find: ‘’: No such file or directory
usage: python -m json.tool [-h] [--sort-keys] [--json-lines]
                           [infile] [outfile]
python -m json.tool: error: argument infile: can't open '': [Errno 2] No such file or directory: ''


In [39]:
%%bash
path_std_dir="/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir"
txt_file=$(find "$path_std_dir" -name "*.txt")
cat "$txt_file"

/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_1_1334/HLPXGM09048594/2021-10-08/images_videos
/LUSTRE/sacmod/SIPECAM/testing_simex/sipecam_simex_std_dir/92/3_92_0_1343/24E1440360371EA1/2021-10-09/audios/Audible


In [40]:
%%bash
rm /shared_volume/sipecam_files_to_extract_metadata_from_26-06-2022.txt
rm /shared_volume/sipecam_subdirectories_26-06-2022.txt

# Example of alternative query in move_files cli

In [1]:
import datetime
from simex.utils.zendro import query_alternative_auxiliar_for_move_files_to_standard_directory, \
query_alternative_for_move_files_to_standard_directory

In [None]:
%%bash
echo "SIPECAM_ZENDRO_GQL_URL=https://gql.sipecamdata.conabio.gob.mx/" > ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_USER=<user>" >> ~/.simex_env
echo "SIPECAM_ZENDRO_GQL_PASSWORD=<password>" >> ~/.simex_env


`/LUSTRE/sacmod/SIPECAM/cumulo_92/primera_entrega/Mes 3/3_92_1_1334/09048594`

In [17]:
first_date_str = "2021-10-21"
second_date_str = "2021-11-05"
serial_number = "HLPXGM09048594"
file_type="image"
query_result, operation_sgqlc = query_alternative_auxiliar_for_move_files_to_standard_directory(serial_number,
                                                                                                file_type)
print(operation_sgqlc)
device_deploymentsFilter_list = query_result["data"]["physical_devices"][0]["device_deploymentsFilter"]
format_string_data = "%Y-%m-%d"
list_dates_device_deployment = [d["date_deployment"].split('T')[0] for d in device_deploymentsFilter_list]
list_datetimes_device_deployment = [datetime.datetime.strptime(d["date_deployment"].split('T')[0],
                                                               format_string_data) for d in device_deploymentsFilter_list]
first_datetime  = datetime.datetime.strptime(first_date_str, format_string_data)
second_datetime = datetime.datetime.strptime(second_date_str, format_string_data)


query {
  physical_devices(pagination: {limit: 0}, search: {field: serial_number, value: "HLPXGM09048594", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}) {
      date_deployment
    }
  }
}


In [18]:
device_deploymentsFilter_list

[{'date_deployment': '2021-10-08T16:56:00.000Z'},
 {'date_deployment': '2021-11-13T15:50:00.000Z'},
 {'date_deployment': '2021-07-28T22:29:00.000Z'},
 {'date_deployment': '2021-12-18T15:58:00.000Z'},
 {'date_deployment': '2022-01-26T17:55:00.000Z'},
 {'date_deployment': '2022-03-05T16:06:00.000Z'},
 {'date_deployment': '2022-04-06T16:09:00.000Z'},
 {'date_deployment': '2022-05-14T14:10:00.000Z'}]

In [19]:
list_datetimes_device_deployment

[datetime.datetime(2021, 10, 8, 0, 0),
 datetime.datetime(2021, 11, 13, 0, 0),
 datetime.datetime(2021, 7, 28, 0, 0),
 datetime.datetime(2021, 12, 18, 0, 0),
 datetime.datetime(2022, 1, 26, 0, 0),
 datetime.datetime(2022, 3, 5, 0, 0),
 datetime.datetime(2022, 4, 6, 0, 0),
 datetime.datetime(2022, 5, 14, 0, 0)]

In [20]:
list_datetimes_device_deployment.sort()


In [21]:
list_datetimes_device_deployment

[datetime.datetime(2021, 7, 28, 0, 0),
 datetime.datetime(2021, 10, 8, 0, 0),
 datetime.datetime(2021, 11, 13, 0, 0),
 datetime.datetime(2021, 12, 18, 0, 0),
 datetime.datetime(2022, 1, 26, 0, 0),
 datetime.datetime(2022, 3, 5, 0, 0),
 datetime.datetime(2022, 4, 6, 0, 0),
 datetime.datetime(2022, 5, 14, 0, 0)]

In [22]:
def get_date_of_device_deploymentsFilter_list(d):
    return datetime.datetime.strptime(d["date_deployment"].split('T')[0],
                                      format_string_data)

In [23]:
device_deploymentsFilter_list.sort(key=get_date_of_device_deploymentsFilter_list)

In [24]:
device_deploymentsFilter_list

[{'date_deployment': '2021-07-28T22:29:00.000Z'},
 {'date_deployment': '2021-10-08T16:56:00.000Z'},
 {'date_deployment': '2021-11-13T15:50:00.000Z'},
 {'date_deployment': '2021-12-18T15:58:00.000Z'},
 {'date_deployment': '2022-01-26T17:55:00.000Z'},
 {'date_deployment': '2022-03-05T16:06:00.000Z'},
 {'date_deployment': '2022-04-06T16:09:00.000Z'},
 {'date_deployment': '2022-05-14T14:10:00.000Z'}]

In [25]:
max_number_of_days = 60
for k in range(len(list_datetimes_device_deployment) - 1):
    datetime_device_deployment_1 = list_datetimes_device_deployment[k]
    datetime_device_deployment_2 = list_datetimes_device_deployment[k+1]
    diff_datetimes = second_datetime - datetime_device_deployment_1
    diff_datetimes_days = diff_datetimes.days
    print(datetime_device_deployment_1)
    print(datetime_device_deployment_2)
    if datetime_device_deployment_1 <= first_datetime and second_datetime <= datetime_device_deployment_2 and diff_datetimes_days <= max_number_of_days:
        idx_date = k
        break
    else:
        idx_date = None

2021-07-28 00:00:00
2021-10-08 00:00:00
2021-10-08 00:00:00
2021-11-13 00:00:00


In [26]:
idx_date

1

In [27]:
date_for_filter = device_deploymentsFilter_list[idx_date]["date_deployment"]

In [28]:
date_for_filter

'2021-10-08T16:56:00.000Z'

In [29]:


query_result, operation_sgqlc = query_alternative_for_move_files_to_standard_directory(serial_number,
                                                                                       date_for_filter,
                                                                                       file_type)
device_deploymentsFilter_list = query_result["data"]["physical_devices"][0]["device_deploymentsFilter"]
print(operation_sgqlc)


query {
  physical_devices(pagination: {limit: 0}, search: {field: serial_number, value: "HLPXGM09048594", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}, search: {field: date_deployment, value: "2021-10-08T16:56:00.000Z", operator: eq}) {
      node {
        nomenclatura
        cat_integr
        ecosystems {
          name
        }
      }
      cumulus {
        name
        geometry
      }
      date_deployment
      latitude
      longitude
    }
  }
}


In [30]:
device_deploymentsFilter_list

[{'node': {'nomenclatura': '3_92_1_1334',
   'cat_integr': 'Integro',
   'ecosystems': {'name': 'Selvas humedas'}},
  'cumulus': {'name': '92',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-90.809, 16.202],
      [-90.890465, 16.249365],
      [-90.936983, 16.252845],
      [-90.940578, 16.20791],
      [-90.94417, 16.162978],
      [-90.894264, 16.103474],
      [-90.811481, 16.109274],
      [-90.809, 16.202]]]}},
  'date_deployment': '2021-10-08T16:56:00.000Z',
  'latitude': 16.264693,
  'longitude': -90.9371013}]

**Another example**

`/LUSTRE/sacmod/SIPECAM/cumulo_92/primera_entrega/Mes 4/3_92_0_1341/3_92_0_1341_3/audio/Nueva carpeta`

In [31]:
first_date_str = "2021-11-14"
second_date_str = "2021-12-06"
serial_number = "24E1440360371EA1"
file_type="audio"
query_result, operation_sgqlc = query_alternative_auxiliar_for_move_files_to_standard_directory(serial_number,
                                                                                                file_type)
print(operation_sgqlc)
device_deploymentsFilter_list = query_result["data"]["physical_devices"][0]["device_deploymentsFilter"]
format_string_data = "%Y-%m-%d"
list_dates_device_deployment = [d["date_deployment"].split('T')[0] for d in device_deploymentsFilter_list]
list_datetimes_device_deployment = [datetime.datetime.strptime(d["date_deployment"].split('T')[0],
                                                               format_string_data) for d in device_deploymentsFilter_list]
first_datetime  = datetime.datetime.strptime(first_date_str, format_string_data)
second_datetime = datetime.datetime.strptime(second_date_str, format_string_data)


query {
  physical_devices(pagination: {limit: 0}, search: {field: comments, value: "ADM24E1440360371EA1", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}) {
      date_deployment
    }
  }
}


In [32]:
device_deploymentsFilter_list

[{'date_deployment': '2021-10-09T14:56:00.000Z'},
 {'date_deployment': '2021-12-19T16:05:00.000Z'}]

In [33]:
list_datetimes_device_deployment

[datetime.datetime(2021, 10, 9, 0, 0), datetime.datetime(2021, 12, 19, 0, 0)]

In [34]:
list_datetimes_device_deployment.sort()


In [35]:
list_datetimes_device_deployment

[datetime.datetime(2021, 10, 9, 0, 0), datetime.datetime(2021, 12, 19, 0, 0)]

In [36]:
def get_date_of_device_deploymentsFilter_list(d):
    return datetime.datetime.strptime(d["date_deployment"].split('T')[0],
                                      format_string_data)

In [37]:
device_deploymentsFilter_list.sort(key=get_date_of_device_deploymentsFilter_list)

In [38]:
device_deploymentsFilter_list

[{'date_deployment': '2021-10-09T14:56:00.000Z'},
 {'date_deployment': '2021-12-19T16:05:00.000Z'}]

In [39]:
max_number_of_days = 60
for k in range(len(list_datetimes_device_deployment) - 1):
    datetime_device_deployment_1 = list_datetimes_device_deployment[k]
    datetime_device_deployment_2 = list_datetimes_device_deployment[k+1]
    diff_datetimes = second_datetime - datetime_device_deployment_1
    diff_datetimes_days = diff_datetimes.days
    print(datetime_device_deployment_1)
    print(datetime_device_deployment_2)
    print(diff_datetimes_days)
    if datetime_device_deployment_1 <= first_datetime and second_datetime <= datetime_device_deployment_2 and diff_datetimes_days <= max_number_of_days:
        idx_date = k
        break
    else:
        idx_date = None

2021-10-09 00:00:00
2021-12-19 00:00:00
58


In [40]:
idx_date

0

In [41]:
date_for_filter = device_deploymentsFilter_list[idx_date]["date_deployment"]

In [42]:
date_for_filter

'2021-10-09T14:56:00.000Z'

In [43]:


query_result, operation_sgqlc = query_alternative_for_move_files_to_standard_directory(serial_number,
                                                                                       date_for_filter,
                                                                                       file_type)
device_deploymentsFilter_list = query_result["data"]["physical_devices"][0]["device_deploymentsFilter"]
print(operation_sgqlc)


query {
  physical_devices(pagination: {limit: 0}, search: {field: comments, value: "ADM24E1440360371EA1", operator: like}) {
    device_deploymentsFilter(pagination: {limit: 0}, search: {field: date_deployment, value: "2021-10-09T14:56:00.000Z", operator: eq}) {
      node {
        nomenclatura
        cat_integr
        ecosystems {
          name
        }
      }
      cumulus {
        name
        geometry
      }
      date_deployment
      latitude
      longitude
    }
  }
}


In [44]:
device_deploymentsFilter_list

[{'node': {'nomenclatura': '3_92_0_1343',
   'cat_integr': 'Degradado',
   'ecosystems': {'name': 'Selvas humedas'}},
  'cumulus': {'name': '92',
   'geometry': {'type': 'Polygon',
    'coordinates': [[[-90.809, 16.202],
      [-90.890465, 16.249365],
      [-90.936983, 16.252845],
      [-90.940578, 16.20791],
      [-90.94417, 16.162978],
      [-90.894264, 16.103474],
      [-90.811481, 16.109274],
      [-90.809, 16.202]]]}},
  'date_deployment': '2021-10-09T14:56:00.000Z',
  'latitude': 16.1087878,
  'longitude': -90.9012417}]

# (Deprecated) Extract metadata and ingest it

In [12]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0049.JPG"
extract_metadata_and_ingest_it --input_file "$file_to_be_processed"

In [13]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0012.AVI"
extract_metadata_and_ingest_it --input_file "$file_to_be_processed"

In [14]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Audio/1338_1/Ultrasonico/20210806_044000.WAV"
extract_metadata_and_ingest_it --input_file "$file_to_be_processed"

## Check

In [15]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0049.JPG"
dirname_file_to_be_processed=$(dirname "$file_to_be_processed")
dir_for_logs=$(find "$dirname_file_to_be_processed" -name 'logs_simex_extract_metadata_and_ingest')
filename_to_be_processed=$(basename "$file_to_be_processed")
cat "$dir_for_logs/$filename_to_be_processed.logs"

2021-12-15 11:35:34,559 [INFO]  extraction of metadata and ingestion of /LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0049.JPG
2021-12-15 11:35:34,561 [INFO]  Read metadata of JPG
2021-12-15 11:35:34,561 [INFO]  {}
2021-12-15 11:35:34,561 [INFO]  Getting node nomenclature and cumulus name from zendro
2021-12-15 11:35:34,562 [INFO]  Copying file to 3/1_3_1_28/2021-10-01/JPG


In [16]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0012.AVI"
dirname_file_to_be_processed=$(dirname "$file_to_be_processed")
dir_for_logs=$(find "$dirname_file_to_be_processed" -name 'logs_simex_extract_metadata_and_ingest')
filename_to_be_processed=$(basename "$file_to_be_processed")
cat "$dir_for_logs/$filename_to_be_processed.logs"

2021-12-15 11:35:35,463 [INFO]  extraction of metadata and ingestion of /LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Camaras/1338_1/100RECNX/RCNX0012.AVI
2021-12-15 11:35:35,465 [INFO]  Read metadata of AVI
2021-12-15 11:35:35,465 [INFO]  {}
2021-12-15 11:35:35,465 [INFO]  Getting node nomenclature and cumulus name from zendro
2021-12-15 11:35:35,465 [INFO]  Copying file to 3/1_3_1_28/2021-10-01/AVI


In [17]:
%%bash
file_to_be_processed="/LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Audio/1338_1/Ultrasonico/20210806_044000.WAV"
dirname_file_to_be_processed=$(dirname "$file_to_be_processed")
dir_for_logs=$(find "$dirname_file_to_be_processed" -name 'logs_simex_extract_metadata_and_ingest')
filename_to_be_processed=$(basename "$file_to_be_processed")
cat "$dir_for_logs/$filename_to_be_processed.logs"

2021-12-15 11:35:35,910 [INFO]  extraction of metadata and ingestion of /LUSTRE/sacmod/SIPECAM/Entregas_2021/octubre_2021/SIPECAM/Playon 1338/Audio/1338_1/Ultrasonico/20210806_044000.WAV
2021-12-15 11:35:35,912 [INFO]  Read metadata of WAV
2021-12-15 11:35:35,912 [INFO]  {}
2021-12-15 11:35:35,912 [INFO]  Getting node nomenclature and cumulus name from zendro
2021-12-15 11:35:35,912 [INFO]  Copying file to 3/1_3_1_28/2021-10-01/WAV
