### OCI Data Science - Useful Tips
<details>
<summary><font size="2">Check for Public Internet Access</font></summary>

```python
import requests
response = requests.get("https://oracle.com")
assert response.status_code==200, "Internet connection failed"
```
</details>
<details>
<summary><font size="2">Helpful Documentation </font></summary>
<ul><li><a href="https://docs.cloud.oracle.com/en-us/iaas/data-science/using/data-science.htm">Data Science Service Documentation</a></li>
<li><a href="https://docs.cloud.oracle.com/iaas/tools/ads-sdk/latest/index.html">ADS documentation</a></li>
</ul>
</details>
<details>
<summary><font size="2">Typical Cell Imports and Settings for ADS</font></summary>

```python
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.ERROR)

import ads
from ads.dataset.factory import DatasetFactory
from ads.automl.provider import OracleAutoMLProvider
from ads.automl.driver import AutoML
from ads.evaluations.evaluator import ADSEvaluator
from ads.common.data import ADSData
from ads.explanations.explainer import ADSExplainer
from ads.explanations.mlx_global_explainer import MLXGlobalExplainer
from ads.explanations.mlx_local_explainer import MLXLocalExplainer
from ads.catalog.model import ModelCatalog
from ads.common.model_artifact import ModelArtifact
```
</details>
<details>
<summary><font size="2">Useful Environment Variables</font></summary>

```python
import os
print(os.environ["NB_SESSION_COMPARTMENT_OCID"])
print(os.environ["PROJECT_OCID"])
print(os.environ["USER_OCID"])
print(os.environ["TENANCY_OCID"])
print(os.environ["NB_REGION"])
```
</details>

In [1]:
# Loads the clean version of Dpre and trains the pretrained model of NDCC (g)
class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes=2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

class EarlyStopping:
    def __init__(self, patience=2, min_delta=0.0005):
        self.patience = patience
        self.min_delta = min_delta
        self.counter = 0
        self.best_loss = None
        self.early_stop = False

    def __call__(self, val_loss):
        if self.best_loss is None:
            self.best_loss = val_loss
        elif val_loss > self.best_loss - self.min_delta:
            self.counter += 1
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_loss = val_loss
            self.counter = 0

def evaluate_model(val_loader, model):
    model.eval()
    all_preds, all_labels = [], []
    with torch.no_grad():
        for inputs, labels in val_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            all_preds.extend(predicted.tolist())
            all_labels.extend(labels.tolist())
    print(classification_report(all_labels, all_preds))

train_dataset = torch.load('Dpre_clean.pth')
features_tensor, labels_tensor = train_dataset.tensors  

kf = KFold(n_splits=5, shuffle=True, random_state=42)
best_model_wts = None
best_accuracy = 0

for fold, (train_idx, val_idx) in enumerate(kf.split(features_tensor), 1):
    print(f'\nFold {fold}')
    X_train, X_val = features_tensor[train_idx], features_tensor[val_idx]
    y_train, y_val = labels_tensor[train_idx], labels_tensor[val_idx]
    train_dataset = TensorDataset(X_train, y_train)
    val_dataset = TensorDataset(X_val, y_val)
    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32)

    model = Net(70, 64, 2) 
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay=1e-4)  # Added weight decay
    early_stopping = EarlyStopping(patience=5, min_delta=0.001)

    # Training loop
    for epoch in range(50): 
        model.train()
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        # Validation phase
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                outputs = model(inputs)
                val_loss += criterion(outputs, labels).item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        val_loss /= len(val_loader)
        accuracy = 100 * correct / total
        print(f'Epoch [{epoch+1}/50], Validation Loss: {val_loss:.4f}, Accuracy: {accuracy:.2f}%')

        early_stopping(val_loss)
        if early_stopping.early_stop:
            print("Early stopping triggered.")
            break

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_model_wts = model.state_dict()

    evaluate_model(val_loader, model)

if best_model_wts:
    torch.save(best_model_wts, 'FeedForward_Enhanced.pth')
    print(f'\nBest Model Accuracy: {best_accuracy:.2f}%')
else:
    print("No model was saved.")


Fold 1
Epoch [1/50], Validation Loss: 0.5175, Accuracy: 84.55%
Epoch [2/50], Validation Loss: 0.3846, Accuracy: 84.55%
Epoch [3/50], Validation Loss: 0.3537, Accuracy: 84.55%
Epoch [4/50], Validation Loss: 0.3361, Accuracy: 84.55%
Epoch [5/50], Validation Loss: 0.3239, Accuracy: 84.55%
Epoch [6/50], Validation Loss: 0.3178, Accuracy: 85.45%
Epoch [7/50], Validation Loss: 0.3129, Accuracy: 87.27%
Epoch [8/50], Validation Loss: 0.3100, Accuracy: 87.73%
Epoch [9/50], Validation Loss: 0.3059, Accuracy: 88.18%
Epoch [10/50], Validation Loss: 0.3131, Accuracy: 88.18%
Epoch [11/50], Validation Loss: 0.3157, Accuracy: 89.55%
Epoch [12/50], Validation Loss: 0.3167, Accuracy: 90.00%
Epoch [13/50], Validation Loss: 0.3214, Accuracy: 90.00%
Epoch [14/50], Validation Loss: 0.3184, Accuracy: 90.45%
Early stopping triggered.
              precision    recall  f1-score   support

           0       0.84      0.47      0.60        34
           1       0.91      0.98      0.95       186

    accuracy 

In [3]:
# calculates the accuracy of the pretrained model on validation dataset

class Net(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes=2):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu1 = nn.ReLU()
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(hidden_size, hidden_size // 2)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_size // 2, num_classes)
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.dropout1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x
    
input_size = 70 
model = Net(input_size=input_size, hidden_size=64, num_classes=2)

dataset_path = 'D_validation_clean.pth'
train_dataset = torch.load(dataset_path)
features_tensor, labels_tensor = train_dataset.tensors

val_dataset = TensorDataset(features_tensor, labels_tensor)
val_loader = DataLoader(val_dataset, batch_size=32)
model = Net(input_size=70, hidden_size=64)
model_path = 'FeedForward_Enhanced.pth'
model.load_state_dict(torch.load(model_path))
model.eval()
def evaluate_model(val_loader, model):
    correct = 0
    total = 0
    with torch.no_grad():
        for data in val_loader:
            inputs, labels = data
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

accuracy = evaluate_model(val_loader, model)
print(f'Accuracy of the model g on D_validation_clean: {accuracy:.2f}%')

Accuracy of the model g on D_validation_clean: 87.53%
