In [None]:
import torch
import numpy as np

In [None]:
np.__version__

'1.25.2'

In [None]:
from torch import nn

In [None]:
torch.__version__

'2.3.0+cu121'

In [None]:
import pandas as pd

In [None]:
pd.__version__

'2.0.3'

In [None]:
import os
import requests
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configuration variables
config = {
    "REPO_URL": os.environ.get('REPO_URL', 'https://github.com/danilonicioka/mlops-workflow.git'),
    "CLONED_DIR": os.environ.get('CLONED_DIR', 'mlops-workflow'),
    "FILE_URL": os.environ.get('FILE_URL', 'https://raw.githubusercontent.com/razaulmustafa852/youtubegoes5g/main/Models/Stall-Windows%20-%20Stall-3s.csv'),
    "DVC_FILE_DIR": os.environ.get('DVC_FILE_DIR', 'data/external'),
    "DVC_FILE_NAME": os.environ.get('DVC_FILE_NAME', 'init_dataset.csv'),
    "BRANCH_NAME": os.environ.get('BRANCH_NAME', 'tests'),
    "BUCKET_NAME": os.environ.get('BUCKET_NAME', 'dvc-data'),
    "MINIO_URL": os.environ.get('MINIO_URL', 'localhost:9000'),
    "ACCESS_KEY": os.environ.get('ACCESS_KEY'),
    "SECRET_KEY": os.environ.get('SECRET_KEY'),
    "REMOTE_NAME": os.environ.get('REMOTE_NAME', 'minio_remote'),
    "GITHUB_USERNAME": os.environ.get('GITHUB_USERNAME'),
    "GITHUB_TOKEN": os.environ.get('GITHUB_TOKEN')
}

file_url = config["FILE_URL"]
local_file_path = config["DVC_FILE_NAME"]

try:
    # Request the file content
    response = requests.get(file_url)
    response.raise_for_status()

    # Save the file content locally
    with open(local_file_path, 'wb') as local_file:
        local_file.write(response.content)
    logger.info(f"Successfully downloaded file from {file_url} to {local_file_path}")
except requests.RequestException as e:
    # Log and raise any download errors
    logger.error(f"Failed to download file: {e}")
    raise

In [None]:
colab_path = os.path.join('/content', local_file_path)
df = pd.read_csv(colab_path)

In [None]:
df.columns

Index(['ID', 'Stall', 'Quality', 'Time', 'CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P'],
      dtype='object')

In [None]:
print(df)

          ID Stall   Quality      Time CQI1 CQI2 CQI3  cSTD CQI cMajority  \
0      4P7s2    No    hd2160  16:14:29   13   13   13  0.000000        13   
1      4P7s2    No    hd2160  16:14:30   13   13   13  0.000000        13   
2      4P7s2    No    hd2160  16:14:31   13   13   13  0.000000        13   
3      4P7s2    No    hd2160  16:14:32   13   13   12  0.471405        13   
4      4P7s2    No    hd2160  16:14:33   12   14   12  0.942809        12   
...      ...   ...       ...       ...  ...  ...  ...       ...       ...   
2688  5Po26s   Yes  unknown,  17:43:18   14   14   14  0.000000        14   
2689  4Po26s   Yes  unknown,  17:43:23  NaN  NaN  NaN  0.000000       NaN   
2690  4Po26s   Yes   hd1440,  17:43:33  NaN  NaN  NaN  0.000000       NaN   
2691   4I27s   Yes  unknown,  10:52:04    9    9    9  0.000000         9   
2692   4I27s   Yes   hd1440,  10:52:16    9    9    9  0.000000         9   

      c25 P  ...  q25 P  q50 P  q75 P  SNR1  SNR2  SNR3  sMajority  s25 P  

In [None]:
#df = df.replace([' ', '-',np.nan], 0) # There are null values
df = df.replace([' ', '-',np.nan], np.nan)

In [None]:
print(df)

          ID Stall   Quality      Time CQI1 CQI2 CQI3  cSTD CQI cMajority  \
0      4P7s2    No    hd2160  16:14:29   13   13   13  0.000000        13   
1      4P7s2    No    hd2160  16:14:30   13   13   13  0.000000        13   
2      4P7s2    No    hd2160  16:14:31   13   13   13  0.000000        13   
3      4P7s2    No    hd2160  16:14:32   13   13   12  0.471405        13   
4      4P7s2    No    hd2160  16:14:33   12   14   12  0.942809        12   
...      ...   ...       ...       ...  ...  ...  ...       ...       ...   
2688  5Po26s   Yes  unknown,  17:43:18   14   14   14  0.000000        14   
2689  4Po26s   Yes  unknown,  17:43:23  NaN  NaN  NaN  0.000000       NaN   
2690  4Po26s   Yes   hd1440,  17:43:33  NaN  NaN  NaN  0.000000       NaN   
2691   4I27s   Yes  unknown,  10:52:04    9    9    9  0.000000         9   
2692   4I27s   Yes   hd1440,  10:52:16    9    9    9  0.000000         9   

      c25 P  ...  q25 P  q50 P  q75 P  SNR1  SNR2  SNR3  sMajority  s25 P  

In [None]:
# Selective columns for mean calculation
columns_to_convert = ['CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P']
df[columns_to_convert] = df[columns_to_convert].astype(float)

# Replace np.nan with mean values for selective columns
df[columns_to_convert] = df[columns_to_convert].fillna(df[columns_to_convert].mean())

# Display the modified DataFrame
print(df)

          ID Stall   Quality      Time       CQI1       CQI2       CQI3  \
0      4P7s2    No    hd2160  16:14:29  13.000000  13.000000  13.000000   
1      4P7s2    No    hd2160  16:14:30  13.000000  13.000000  13.000000   
2      4P7s2    No    hd2160  16:14:31  13.000000  13.000000  13.000000   
3      4P7s2    No    hd2160  16:14:32  13.000000  13.000000  12.000000   
4      4P7s2    No    hd2160  16:14:33  12.000000  14.000000  12.000000   
...      ...   ...       ...       ...        ...        ...        ...   
2688  5Po26s   Yes  unknown,  17:43:18  14.000000  14.000000  14.000000   
2689  4Po26s   Yes  unknown,  17:43:23   9.297063   9.271461   9.249437   
2690  4Po26s   Yes   hd1440,  17:43:33   9.297063   9.271461   9.249437   
2691   4I27s   Yes  unknown,  10:52:04   9.000000   9.000000   9.000000   
2692   4I27s   Yes   hd1440,  10:52:16   9.000000   9.000000   9.000000   

      cSTD CQI  cMajority  c25 P  ...  q25 P  q50 P  q75 P       SNR1  \
0     0.000000  13.000000 

In [None]:
# Check which columns contain np.nan values
columns_with_nan = df.isna().any()
# Display the columns with np.nan values
print(columns_with_nan)

ID           False
Stall        False
Quality      False
Time         False
CQI1         False
CQI2         False
CQI3         False
cSTD CQI     False
cMajority    False
c25 P        False
c50 P        False
c75 P        False
RSRP1        False
RSRP2        False
RSRP3        False
pMajority    False
p25 P        False
p50 P        False
p75 P        False
RSRQ1        False
RSRQ2        False
RSRQ3        False
qMajority    False
q25 P        False
q50 P        False
q75 P        False
SNR1         False
SNR2         False
SNR3         False
sMajority    False
s25 P        False
s50 P        False
s75 P        False
dtype: bool


In [None]:
df['Stall'].replace('Yes', 1, inplace=True)
df['Stall'].replace('No', 0, inplace=True)

In [None]:
print(df)

          ID  Stall   Quality      Time       CQI1       CQI2       CQI3  \
0      4P7s2      0    hd2160  16:14:29  13.000000  13.000000  13.000000   
1      4P7s2      0    hd2160  16:14:30  13.000000  13.000000  13.000000   
2      4P7s2      0    hd2160  16:14:31  13.000000  13.000000  13.000000   
3      4P7s2      0    hd2160  16:14:32  13.000000  13.000000  12.000000   
4      4P7s2      0    hd2160  16:14:33  12.000000  14.000000  12.000000   
...      ...    ...       ...       ...        ...        ...        ...   
2688  5Po26s      1  unknown,  17:43:18  14.000000  14.000000  14.000000   
2689  4Po26s      1  unknown,  17:43:23   9.297063   9.271461   9.249437   
2690  4Po26s      1   hd1440,  17:43:33   9.297063   9.271461   9.249437   
2691   4I27s      1  unknown,  10:52:04   9.000000   9.000000   9.000000   
2692   4I27s      1   hd1440,  10:52:16   9.000000   9.000000   9.000000   

      cSTD CQI  cMajority  c25 P  ...  q25 P  q50 P  q75 P       SNR1  \
0     0.000000

In [None]:
df.columns

Index(['ID', 'Stall', 'Quality', 'Time', 'CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P'],
      dtype='object')

In [None]:
X = df[['CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P']].values

In [None]:
y = df['Stall'].values

In [None]:
X.shape, y.shape

((2693, 29), (2693,))

In [None]:
import numpy as np

In [None]:
import sklearn
from sklearn.model_selection import train_test_split

In [None]:
sklearn.__version__

'1.2.2'

In [None]:
import imblearn
from imblearn.over_sampling import SMOTE

In [None]:
imblearn.__version__

'0.10.1'

In [None]:
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
X.shape

(3524, 29)

In [None]:
X = torch.from_numpy(X).type(torch.float32)
y = torch.from_numpy(y).type(torch.float32)

In [None]:
X.shape

torch.Size([3524, 29])

In [None]:
X[0]

tensor([ 1.3551,  1.3578,  1.3715, -0.8032,  1.3567,  1.5745,  1.3320,  1.2150,
         1.7027,  1.6979,  1.3029,  1.6994,  1.2636,  1.3922,  1.3648,  0.6057,
         0.6058, -0.5052,  0.6033,  0.0857,  0.5796,  0.5442,  0.4621,  0.4555,
        -0.1812,  0.4575,  0.1983,  0.4634,  0.4200])

In [None]:
y.shape

torch.Size([3524])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42
)

print("X_train:", X_train[:1],"\nX_train_shape:", X_train[:1].shape,"\nX_test:", X_test[:1],"\nX_test_shape:",X_test[:1].shape, "\ny_train:", y_train[:1],"\ny_test:", y_test[:1])

X_train: tensor([[ 0.2905,  1.3578, -0.3996,  1.3912,  0.2987,  0.1456,  0.3388,  0.7077,
          0.9935,  0.9905,  0.9906,  0.9918,  0.8294,  0.7938,  0.7658,  0.3826,
          0.3838, -0.0594,  0.3814,  0.1942,  0.3612,  0.3194,  1.1020,  1.0953,
          0.9683,  1.0983,  1.0942,  1.1010,  1.0657]]) 
X_train_shape: torch.Size([1, 29]) 
X_test: tensor([[ 2.0649,  1.7108,  1.7257, -0.2998,  1.7094,  1.9318,  1.6630,  1.7222,
          1.6239,  1.7765,  1.6152,  1.6207,  1.3638,  1.3257,  1.3648,  1.4979,
          1.4938,  1.5006,  1.4907,  1.4969,  1.4533,  1.4435,  1.7418,  1.7350,
          1.7346,  1.7392,  1.7981,  1.7385,  1.7114]]) 
X_test_shape: torch.Size([1, 29]) 
y_train: tensor([0.]) 
y_test: tensor([0.])


In [None]:
print("X_train:", X_train[:1])

X_train: tensor([[ 0.2905,  1.3578, -0.3996,  1.3912,  0.2987,  0.1456,  0.3388,  0.7077,
          0.9935,  0.9905,  0.9906,  0.9918,  0.8294,  0.7938,  0.7658,  0.3826,
          0.3838, -0.0594,  0.3814,  0.1942,  0.3612,  0.3194,  1.1020,  1.0953,
          0.9683,  1.0983,  1.0942,  1.1010,  1.0657]])


In [None]:
torch.save(X_train, "/tmp/X_train.pt")

In [None]:
X_train_loaded = torch.load("/tmp/X_train.pt")
print("X_train:", X_train_loaded[:1])

X_train: tensor([[ 0.2905,  1.3578, -0.3996,  1.3912,  0.2987,  0.1456,  0.3388,  0.7077,
          0.9935,  0.9905,  0.9906,  0.9918,  0.8294,  0.7938,  0.7658,  0.3826,
          0.3838, -0.0594,  0.3814,  0.1942,  0.3612,  0.3194,  1.1020,  1.0953,
          0.9683,  1.0983,  1.0942,  1.1010,  1.0657]])


In [None]:
type(X_train)

torch.Tensor

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [None]:
# Build model with non-linear activation function
from torch import nn
class InteruptionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=29, out_features=200)
        self.layer_2 = nn.Linear(in_features=200, out_features=100)
        self.layer_3 = nn.Linear(in_features=100, out_features=1)
        self.relu = nn.ReLU() # <- add in ReLU activation function
        # Can also put sigmoid in the model
        # This would mean you don't need to use it on the predictions
        # self.sigmoid = nn.Sigmoid()

    def forward(self, x):
      # Intersperse the ReLU activation function between layers
       return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

model_3 = InteruptionModel().to(device)
print(model_3)

InteruptionModel(
  (layer_1): Linear(in_features=29, out_features=200, bias=True)
  (layer_2): Linear(in_features=200, out_features=100, bias=True)
  (layer_3): Linear(in_features=100, out_features=1, bias=True)
  (relu): ReLU()
)


In [None]:
model_3.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[-0.1478, -0.1038,  0.1117,  ...,  0.0241,  0.0003, -0.0470],
                      [ 0.1259, -0.0350, -0.0410,  ..., -0.0713,  0.1540,  0.0846],
                      [-0.1577,  0.1240, -0.0738,  ...,  0.0188,  0.0973, -0.1541],
                      ...,
                      [ 0.1070,  0.1628, -0.0788,  ..., -0.1855, -0.0840, -0.1221],
                      [ 0.1574,  0.1169, -0.1040,  ..., -0.0832, -0.1809,  0.0447],
                      [-0.1300,  0.0310,  0.0321,  ..., -0.1258,  0.0133,  0.1340]],
                     device='cuda:0')),
             ('layer_1.bias',
              tensor([-0.1675, -0.1779,  0.0857,  0.0352,  0.0936,  0.0356,  0.0516,  0.0427,
                       0.0423, -0.1282, -0.1097, -0.0665, -0.0674,  0.1301, -0.0953,  0.0152,
                       0.1556,  0.1099, -0.0860, -0.0002, -0.0114, -0.1834,  0.1564, -0.1129,
                       0.0770,  0.0483,  0.1712,  0.0623, -0.1242, -0.1688, -0.1318,

In [None]:
# Setup loss and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_3.parameters(), lr=0.0001)

In [None]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
# Fit the model
torch.manual_seed(42)
epochs = 3500

# Put all data on target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)


for epoch in range(epochs):
    # 1. Forward pass
    y_logits = model_3(X_train).squeeze()

    y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels

    # 2. Calculate loss and accuracy
    loss = loss_fn(y_logits, y_train) # BCEWithLogitsLoss calculates loss using logits
    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    model_3.eval()
    with torch.no_grad():
      # 1. Forward pass
        test_logits = model_3(X_test).squeeze()
        #print(test_logits.shape)
        test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels
        # 2. Calcuate loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test,
                             y_pred=test_pred)


    # Print out what's happening
    if epoch % 500 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")

Epoch: 0 | Loss: 0.68889, Accuracy: 51.15% | Test Loss: 0.68856, Test Accuracy: 50.92%
Epoch: 500 | Loss: 0.47335, Accuracy: 77.72% | Test Loss: 0.49773, Test Accuracy: 77.16%
Epoch: 1000 | Loss: 0.35664, Accuracy: 84.25% | Test Loss: 0.44451, Test Accuracy: 80.00%
Epoch: 1500 | Loss: 0.24264, Accuracy: 90.74% | Test Loss: 0.42692, Test Accuracy: 82.55%
Epoch: 2000 | Loss: 0.15991, Accuracy: 95.10% | Test Loss: 0.44582, Test Accuracy: 83.97%
Epoch: 2500 | Loss: 0.10862, Accuracy: 97.06% | Test Loss: 0.48712, Test Accuracy: 84.54%
Epoch: 3000 | Loss: 0.07627, Accuracy: 98.01% | Test Loss: 0.53836, Test Accuracy: 83.83%


In [None]:
model_3.eval()
with torch.no_grad():
     y_preds = torch.round(torch.sigmoid(model_3(X_test))).squeeze()

In [None]:
y_preds.shape,y_test.shape

(torch.Size([705]), torch.Size([705]))

In [None]:
if device == "cuda":
  predictions = y_preds.cpu().numpy() #if it is cuda, then this, otherwise y_pred.numpy()
  true_labels = y_test.cpu().numpy()
else:
  predictions = y_preds.numpy()
  true_labels = y_test.numpy()

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score,fbeta_score

print("=== Confusion Matrix ===")
print(confusion_matrix(true_labels, predictions))
print('\n')


print("=== Score ===")
accuracy = accuracy_score(true_labels, predictions)
print('Accuracy: %f' % accuracy)

precision = precision_score(true_labels,  predictions, average='weighted')
print('Precision: %f' % precision)
recall = recall_score(true_labels, predictions, average='weighted')
print('Recall: %f' % recall)

microf1 = f1_score(true_labels, predictions, average='micro')
print('Micro F1 score: %f' % microf1)
macrof1 = f1_score(true_labels, predictions, average='macro')
print('Macro F1 score: %f' % macrof1)

=== Confusion Matrix ===
[[313  57]
 [ 51 284]]


=== Score ===
Accuracy: 0.846809
Precision: 0.847039
Recall: 0.846809
Micro F1 score: 0.846809
Macro F1 score: 0.846549


In [None]:
target_names = ['No-Stall', 'Stall']
# Print precision-recall report
print(classification_report(true_labels, predictions, target_names=target_names))

              precision    recall  f1-score   support

    No-Stall       0.86      0.85      0.85       370
       Stall       0.83      0.85      0.84       335

    accuracy                           0.85       705
   macro avg       0.85      0.85      0.85       705
weighted avg       0.85      0.85      0.85       705



In [None]:
#Done

In [None]:
model_3.state_dict()

OrderedDict([('layer_1.weight',
              tensor([[-0.1181, -0.2897,  0.2402,  ...,  0.0786, -0.0192, -0.0708],
                      [ 0.3033,  0.1498,  0.0530,  ..., -0.0489,  0.1703,  0.0055],
                      [-0.3527,  0.1504, -0.2244,  ..., -0.0177,  0.0182, -0.1590],
                      ...,
                      [ 0.2363,  0.3492, -0.0924,  ..., -0.2666, -0.1514, -0.1557],
                      [ 0.2733,  0.2658,  0.0071,  ..., -0.1247, -0.3160, -0.1239],
                      [-0.2570,  0.1280,  0.1380,  ..., -0.0841,  0.0064,  0.0559]],
                     device='cuda:0')),
             ('layer_1.bias',
              tensor([-3.1601e-01, -5.1487e-01, -1.7978e-02,  1.9502e-01,  2.6011e-01,
                       2.5126e-01,  2.2115e-01, -1.0488e-01,  5.4405e-02,  4.6935e-02,
                      -4.7367e-01, -3.3694e-01,  8.0895e-02,  2.2344e-01,  8.2236e-02,
                      -5.8523e-02,  3.2312e-01,  1.6754e-01, -2.3926e-01,  1.4068e-02,
                  

In [None]:
!pip install torchserve

Collecting torchserve
  Downloading torchserve-0.11.1-py3-none-any.whl.metadata (1.4 kB)
Downloading torchserve-0.11.1-py3-none-any.whl (24.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.4/24.4 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torchserve
Successfully installed torchserve-0.11.1


In [None]:
import ts

In [None]:
from ts.handler_utils.timer import timed

In [None]:
import ts.torch_handler as ts.torch_handler.handler

SyntaxError: invalid syntax (<ipython-input-10-6e346fda6a1c>, line 1)