### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [2]:
#Import ADS Library:
#The code imports the ads library, which is Oracle's Accelerated Data Science library. This library provides various tools and functionalities for data science tasks within Oracle Cloud Infrastructure (OCI).
#Print ADS Library Version:
#The print(ads.__version__) command outputs the version of the ads library currently installed. This is useful for debugging, ensuring compatibility, and verifying that the correct version of the library is being used.
import ads
print(ads.__version__)

2.11.9


In [3]:
# To upgrade run - The command !pip install oracle-ads --upgrade is used to upgrade the ADS library to the latest version. This ensures you have the most recent features, bug fixes, and improvements. 
! pip install oracle-ads --upgrade

Collecting oracle-ads
  Using cached oracle_ads-2.11.15-py3-none-any.whl.metadata (15 kB)
Using cached oracle_ads-2.11.15-py3-none-any.whl (22.8 MB)
Installing collected packages: oracle-ads
  Attempting uninstall: oracle-ads
    Found existing installation: oracle_ads 2.11.9
    Uninstalling oracle_ads-2.11.9:
      Successfully uninstalled oracle_ads-2.11.9
Successfully installed oracle-ads-2.11.15


In [25]:
#Import ADS Library: The code imports the ads library, which is Oracle's Accelerated Data Science library. This library provides various tools and functionalities for data science tasks within Oracle Cloud Infrastructure (OCI).
#Import Pandas Library: The code imports the pandas library, which is a powerful data manipulation and analysis library in Python. It is commonly used for handling structured data and performing operations such as data cleaning, transformation, and analysis.
#Set ADS Authentication Method: The ads.set_auth("resource_principal") command sets the authentication method for the ADS library to use "resource principal."
#This method allows the ADS library to authenticate using the resource principal, which is an OCI identity assigned to the compute instance running the code.
#Using resource principal authentication simplifies access to OCI services by managing credentials securely and automatically within the cloud environment.

import ads
import pandas as pd

ads.set_auth("resource_principal")

In [9]:
#By following these steps, the code accesses and reads the specified CSV file from OCI Object Storage into a pandas DataFrame, enabling further data manipulation and analysis.

bucket_name = "skin_cancer_real_images"
namespace = "orasenatdpltintegration01"


file_name = "HAM10000_metadata.csv"
df = pd.read_csv(
    f"oci://{bucket_name}@{namespace}/{file_name}",
    storage_options=ads.common.auth.default_signer(),
)

In [10]:
# The output of df.head() displays the first five rows of the DataFrame, along with the column names and the data contained in those rows. Here is the output in a table format:
df.head()

Unnamed: 0,lesion_id,image_id,dx,dx_type,age,sex,localization
0,HAM_0000118,ISIC_0027419,bkl,histo,80.0,male,scalp
1,HAM_0000118,ISIC_0025030,bkl,histo,80.0,male,scalp
2,HAM_0002730,ISIC_0026769,bkl,histo,80.0,male,scalp
3,HAM_0002730,ISIC_0025661,bkl,histo,80.0,male,scalp
4,HAM_0001466,ISIC_0031633,bkl,histo,75.0,male,ear


In [None]:
# The code downloads a CSV and ZIP file from OCI Object Storage, processes the data, extracts images, and uploads the images back to OCI Object Storage.
import oci
import ads
import pandas as pd
import zipfile
import os
from PIL import Image
import shutil

# Authenticate using ADS library
ads.set_auth('resource_principal')

# Define Object Storage details
bucket_name = "skin_cancer_real_images"
namespace = "orasenatdpltintegration01"
zip_file_name = "SkinCancerData.zip"
csv_file_name = "HAM10000_metadata.csv"

# Define the local paths for the files
local_zip_file_path = f'/home/datascience/{zip_file_name}'
local_csv_file_path = f'/home/datascience/{csv_file_name}'

# Setup Resource Principal for OCI SDK
signer = oci.auth.signers.get_resource_principals_signer()

# Initialize Object Storage Client
object_storage_client = oci.object_storage.ObjectStorageClient(config={}, signer=signer)

# Function to download a file from Object Storage
def download_file_from_oci(bucket_name, namespace, object_name, local_file_path):
    response = object_storage_client.get_object(namespace, bucket_name, object_name)
    with open(local_file_path, 'wb') as f:
        f.write(response.data.content)

# Download the CSV file from Object Storage
download_file_from_oci(bucket_name, namespace, csv_file_name, local_csv_file_path)

# Load the CSV file into a DataFrame
df = pd.read_csv(local_csv_file_path)

# Save the CSV file locally (if needed)
df.to_csv(local_csv_file_path, index=False)

# Download the zip file from Object Storage
download_file_from_oci(bucket_name, namespace, zip_file_name, local_zip_file_path)

# Define the path to extract images
extract_path = '/home/datascience/skin_cancer_real_images/'

# Clear the extract path directory if it exists
if os.path.exists(extract_path):
    shutil.rmtree(extract_path)
os.makedirs(extract_path)

# Extract images from the zip file
with zipfile.ZipFile(local_zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# List and print the extracted files
extracted_files = []
for root, _, files in os.walk(extract_path):
    for file in files:
        if file.endswith('.jpg') or file.endswith('.png'):
            extracted_files.append(os.path.join(root, file))
print(f'Extracted files: {extracted_files}')

# Function to upload a file to Object Storage
def upload_file_to_oci(local_file_path, bucket_name, namespace, object_name):
    with open(local_file_path, 'rb') as f:
        object_storage_client.put_object(namespace, bucket_name, object_name, f)

# Load images into a dictionary and upload them
image_dict = {}
for img_path in extracted_files:
    image = Image.open(img_path).convert('RGB')
    image_dict[os.path.basename(img_path)] = image

    # Upload each image to Object Storage
    object_name = f"extracted_images/{os.path.basename(img_path)}"
    upload_file_to_oci(img_path, bucket_name, namespace, object_name)

# Check the number of images extracted
image_count = len(image_dict)
print(f'Number of images extracted and uploaded: {image_count}')


In [35]:
#This code defines and initializes a Generative Adversarial Network (GAN) using PyTorch, consisting of two neural network models: the Generator and the Discriminator. 
#The Generator creates synthetic images from random noise vectors, using layers of transposed convolutions, batch normalization, and ReLU activations, culminating in a Tanh activation. 
#The Discriminator evaluates images to determine if they are real or fake, using layers of convolutions, batch normalization, and Leaky ReLU activations, ending with a Sigmoid activation.
#The input to this code includes a noise vector of size 100, which serves as the input for the Generator to create synthetic images. The output of the Generator is these synthetic images, 
#while the Discriminator outputs probability scores indicating whether the images are real or fake. The code also prints the architecture of both models to the console for verification. 
#To achieve the intended functionality, the models need to be trained with a dataset of real images, a training loop, loss functions, and optimizers.

# Install torch if not already installed
!pip install torch torchvision

import torch
import torch.nn as nn
import torchvision.transforms as transforms

# Define the Generator model
class Generator(nn.Module):
    def __init__(self, nz):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, 512, 4, 1, 0, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

# Define the Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(512, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

# Hyperparameters
nz = 100  # Size of the noise vector

# Initialize models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
netG = Generator(nz).to(device)
netD = Discriminator().to(device)

# Print model summaries
print("Generator Model:")
print(netG)
print("\nDiscriminator Model:")
print(netD)


Generator Model:
Generator(
  (main): Sequential(
    (0): ConvTranspose2d(100, 512, kernel_size=(4, 4), stride=(1, 1), bias=False)
    (1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): ConvTranspose2d(512, 256, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (4): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): ConvTranspose2d(256, 128, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (7): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (8): ReLU(inplace=True)
    (9): ConvTranspose2d(128, 64, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (10): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU(inplace=True)
    (12): ConvTranspose2d(64, 3, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1), bias=False)
    (1

In [None]:
#This code sets up and trains a Generative Adversarial Network (GAN) using PyTorch, designed to generate synthetic images. 
#The GAN consists of two models: a Generator and a Discriminator. The Generator creates synthetic images from random noise vectors, 
#while the Discriminator attempts to distinguish between real and synthetic images. 
#The code includes downloading images from Oracle Cloud Infrastructure (OCI) Object Storage, training the GAN, and then uploading the generated images back to OCI Object Storage.
# Install torch and torchvision if not already installed
!pip install torch torchvision oci

import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision.utils import save_image
import os
from PIL import Image
import shutil
import oci
from oci.object_storage import ObjectStorageClient
from oci.auth.signers import get_resource_principals_signer

# Define the Generator model
class Generator(nn.Module):
    def __init__(self, nz):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, 512, 4, 1, 0, bias=False),
            nn.BatchNorm2d(512),
            nn.ReLU(True),
            nn.ConvTranspose2d(512, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.ReLU(True),
            nn.ConvTranspose2d(256, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.ReLU(True),
            nn.ConvTranspose2d(128, 64, 4, 2, 1, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU(True),
            nn.ConvTranspose2d(64, 3, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

# Define the Discriminator model
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(64, 128, 4, 2, 1, bias=False),
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(128, 256, 4, 2, 1, bias=False),
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(256, 512, 4, 2, 1, bias=False),
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(512, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input)

# Define training parameters
nz = 100  # Size of the noise vector
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
batch_size = 64
num_epochs = 50
images_to_generate = 20000  # Set the desired number of images to generate
generated_images_count = 0  # Counter for generated images

# Define transformations
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
])

# Create dataset and dataloader
class ImageDataset(torch.utils.data.Dataset):
    def __init__(self, image_dict, transform=None):
        self.image_dict = image_dict
        self.transform = transform
        self.images = list(image_dict.values())

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        image = self.images[idx]
        if self.transform:
            image = self.transform(image)
        return image

# Assuming 'image_dict' is already populated with images
# image_dict = {}  # Populate this dictionary with your actual images

dataset = ImageDataset(image_dict, transform)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Initialize models, loss function, and optimizers
netG = Generator(nz).to(device)
netD = Discriminator().to(device)
criterion = nn.BCELoss()
fixed_noise = torch.randn(64, nz, 1, 1, device=device)
real_label = 1.
fake_label = 0.
optimizerD = optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))

# Setup OCI Object Storage
signer = get_resource_principals_signer()
object_storage_client = ObjectStorageClient(config={}, signer=signer)

# Define OCI bucket details
namespace = "orasenat*************"  # Update with your actual namespace
bucket_name = "synthetic_images_bucket"
compartment_id = "ocid1.compartment.oc1..**********************"  # Update with your compartment OCID
# Function to create a bucket if it doesn't exist
def create_bucket(namespace, bucket_name, compartment_id):
    try:
        object_storage_client.get_bucket(namespace, bucket_name)
        print(f"Bucket {bucket_name} already exists.")
    except oci.exceptions.ServiceError as e:
        if e.status == 404:
            try:
                request = oci.object_storage.models.CreateBucketDetails(
                    name=bucket_name,
                    compartment_id=compartment_id,
                    public_access_type='ObjectRead',
                    storage_tier='Standard'
                )
                object_storage_client.create_bucket(namespace, request)
                print(f"Bucket {bucket_name} created.")
            except oci.exceptions.ServiceError as create_e:
                if create_e.status == 409:
                    print(f"Bucket {bucket_name} already exists.")
                else:
                    raise
        else:
            raise

# Create the bucket if it doesn't exist
create_bucket(namespace, bucket_name, compartment_id)

# Training loop with stopping condition and unique filenames
for epoch in range(num_epochs):
    for i, data in enumerate(dataloader, 0):
        if generated_images_count >= images_to_generate:
            print(f"Stopping training as {images_to_generate} images have been generated.")
            break

        # Update Discriminator
        netD.zero_grad()
        real_cpu = data.to(device)
        b_size = real_cpu.size(0)
        label = torch.full((b_size,), real_label, dtype=torch.float, device=device)
        output = netD(real_cpu).view(-1)
        errD_real = criterion(output, label)
        errD_real.backward()
        D_x = output.mean().item()

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake = netG(noise)
        label.fill_(fake_label)
        output = netD(fake.detach()).view(-1)
        errD_fake = criterion(output, label)
        errD_fake.backward()
        D_G_z1 = output.mean().item()
        errD = errD_real + errD_fake
        optimizerD.step()

        # Update Generator
        netG.zero_grad()
        label.fill_(real_label)
        output = netD(fake).view(-1)
        errG = criterion(output, label)
        errG.backward()
        D_G_z2 = output.mean().item()
        optimizerG.step()

        # Save generated images periodically with unique filenames to OCI Object Storage
        with torch.no_grad():
            fake = netG(fixed_noise).detach().cpu()
        for j in range(fake.size(0)):
            unique_filename = f'synthetic_{epoch}_{i}_{j}.png'
            local_path = os.path.join('/tmp', unique_filename)
            save_image(fake[j], local_path, normalize=True)
            with open(local_path, 'rb') as f:
                object_storage_client.put_object(namespace, bucket_name, unique_filename, f)
            generated_images_count += 1

        # Print progress
        if i % 50 == 0:
            print(f'[{epoch}/{num_epochs}][{i}/{len(dataloader)}] '
                  f'Loss_D: {errD.item():.4f} Loss_G: {errG.item():.4f} '
                  f'D(x): {D_x:.4f} D(G(z)): {D_G_z1:.4f} / {D_G_z2:.4f}')

        # Stop condition if the desired number of images is reached
        if generated_images_count >= images_to_generate:
            break
    if generated_images_count >= images_to_generate:
        break

# Function to count the number of image files in the OCI bucket
def count_images_in_bucket(namespace, bucket_name):
    objects = object_storage_client.list_objects(namespace, bucket_name)
    return len(objects.data.objects)

# Count the number of generated images
num_images = count_images_in_bucket(namespace, bucket_name)
print(f'Number of generated images: {num_images}')
