### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
import os
import pandas as pd

In [8]:
images = os.listdir('datasets/coco-qa/images/')
len(images)

69172

In [4]:
image_ids = pd.unique(pd.read_csv('datasets/coco-qa/labels.csv')['image_id']).tolist()
len(image_ids)

69172

In [5]:
def verify_ids(images, image_ids):
    image_id_set = set(image_ids)
    
    ids_in_images = []
    ids_not_in_images = []

    for i, image in enumerate(images):
        # Extract the numeric part from the image filename
        image_number = int(image.split('.')[0])
        
        if image_number in image_id_set:
            ids_in_images.append((image, image_number))
        else:
            ids_not_in_images.append((image, image_number))
    
    return ids_in_images, ids_not_in_images

In [6]:
ids_in_images, ids_not_in_images = verify_ids(images, image_ids)

print(f"Number of IDs present in images: {len(ids_in_images)}")
print(f"Number of IDs not present in images: {len(ids_not_in_images)}")

Number of IDs present in images: 69172
Number of IDs not present in images: 0
