In [3]:
import torch
import numpy as np

In [4]:
np.__version__

'1.26.4'

In [5]:
from torch import nn

In [6]:
torch.__version__

'2.3.0+cu121'

In [7]:
import pandas as pd

In [8]:
pd.__version__

'2.2.3'

In [9]:
import os
import requests
import logging
# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Configuration variables
config = {
    "REPO_URL": os.environ.get('REPO_URL', 'https://github.com/danilonicioka/mlops-workflow.git'),
    "CLONED_DIR": os.environ.get('CLONED_DIR', 'mlops-workflow'),
    "FILE_URL": os.environ.get('FILE_URL', 'https://raw.githubusercontent.com/razaulmustafa852/youtubegoes5g/main/Models/Stall-Windows%20-%20Stall-3s.csv'),
    "DVC_FILE_DIR": os.environ.get('DVC_FILE_DIR', 'data/external'),
    "DVC_FILE_NAME": os.environ.get('DVC_FILE_NAME', 'init_dataset.csv'),
    "BRANCH_NAME": os.environ.get('BRANCH_NAME', 'tests'),
    "BUCKET_NAME": os.environ.get('BUCKET_NAME', 'dvc-data'),
    "MINIO_URL": os.environ.get('MINIO_URL', 'localhost:9000'),
    "ACCESS_KEY": os.environ.get('ACCESS_KEY'),
    "SECRET_KEY": os.environ.get('SECRET_KEY'),
    "REMOTE_NAME": os.environ.get('REMOTE_NAME', 'minio_remote'),
    "GITHUB_USERNAME": os.environ.get('GITHUB_USERNAME'),
    "GITHUB_TOKEN": os.environ.get('GITHUB_TOKEN')
}

file_url = config["FILE_URL"]
local_file_path = config["DVC_FILE_NAME"]

try:
    # Request the file content
    response = requests.get(file_url)
    response.raise_for_status()

    # Save the file content locally
    with open(local_file_path, 'wb') as local_file:
        local_file.write(response.content)
    logger.info(f"Successfully downloaded file from {file_url} to {local_file_path}")
except requests.RequestException as e:
    # Log and raise any download errors
    logger.error(f"Failed to download file: {e}")
    raise

INFO:__main__:Successfully downloaded file from https://raw.githubusercontent.com/razaulmustafa852/youtubegoes5g/main/Models/Stall-Windows%20-%20Stall-3s.csv to init_dataset.csv


In [10]:
#colab_path = os.path.join('/content', local_file_path)
df = pd.read_csv(local_file_path)

In [11]:
df.columns

Index(['ID', 'Stall', 'Quality', 'Time', 'CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P'],
      dtype='object')

In [12]:
df = df.replace([' ', '-',np.nan], 0) # There are null values
#df = df.replace([' ', '-',np.nan], np.nan)

In [13]:
# Selective columns for mean calculation
columns_to_convert = ['CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P']
df[columns_to_convert] = df[columns_to_convert].astype(float)

# Replace np.nan with mean values for selective columns
df[columns_to_convert] = df[columns_to_convert].fillna(df[columns_to_convert].mean())

# Display the modified DataFrame
#print(df)

In [14]:
# Check which columns contain np.nan values
columns_with_nan = df.isna().any()
# Display the columns with np.nan values
print(columns_with_nan)

ID           False
Stall        False
Quality      False
Time         False
CQI1         False
CQI2         False
CQI3         False
cSTD CQI     False
cMajority    False
c25 P        False
c50 P        False
c75 P        False
RSRP1        False
RSRP2        False
RSRP3        False
pMajority    False
p25 P        False
p50 P        False
p75 P        False
RSRQ1        False
RSRQ2        False
RSRQ3        False
qMajority    False
q25 P        False
q50 P        False
q75 P        False
SNR1         False
SNR2         False
SNR3         False
sMajority    False
s25 P        False
s50 P        False
s75 P        False
dtype: bool


In [15]:
df['Stall'].replace('Yes', 1, inplace=True)
df['Stall'].replace('No', 0, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Stall'].replace('Yes', 1, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Stall'].replace('No', 0, inplace=True)
  df['Stall'].replace('No', 0, inplace=True)


In [16]:
print(df)

          ID  Stall   Quality      Time  CQI1  CQI2  CQI3  cSTD CQI  \
0      4P7s2      0    hd2160  16:14:29  13.0  13.0  13.0  0.000000   
1      4P7s2      0    hd2160  16:14:30  13.0  13.0  13.0  0.000000   
2      4P7s2      0    hd2160  16:14:31  13.0  13.0  13.0  0.000000   
3      4P7s2      0    hd2160  16:14:32  13.0  13.0  12.0  0.471405   
4      4P7s2      0    hd2160  16:14:33  12.0  14.0  12.0  0.942809   
...      ...    ...       ...       ...   ...   ...   ...       ...   
2688  5Po26s      1  unknown,  17:43:18  14.0  14.0  14.0  0.000000   
2689  4Po26s      1  unknown,  17:43:23   0.0   0.0   0.0  0.000000   
2690  4Po26s      1   hd1440,  17:43:33   0.0   0.0   0.0  0.000000   
2691   4I27s      1  unknown,  10:52:04   9.0   9.0   9.0  0.000000   
2692   4I27s      1   hd1440,  10:52:16   9.0   9.0   9.0  0.000000   

      cMajority  c25 P  ...  q25 P  q50 P  q75 P  SNR1  SNR2  SNR3  sMajority  \
0          13.0   13.0  ...   -9.5   -7.0   -7.0  12.0  12.0   7.0

In [17]:
df.columns

Index(['ID', 'Stall', 'Quality', 'Time', 'CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P'],
      dtype='object')

In [18]:
X = df[['CQI1', 'CQI2', 'CQI3', 'cSTD CQI',
       'cMajority', 'c25 P', 'c50 P', 'c75 P', 'RSRP1', 'RSRP2', 'RSRP3',
       'pMajority', 'p25 P', 'p50 P', 'p75 P', 'RSRQ1', 'RSRQ2', 'RSRQ3',
       'qMajority', 'q25 P', 'q50 P', 'q75 P', 'SNR1', 'SNR2', 'SNR3',
       'sMajority', 's25 P', 's50 P', 's75 P']].values

In [19]:
y = df['Stall'].values

In [20]:
X.shape, y.shape

((2693, 29), (2693,))

In [21]:
import numpy as np

In [22]:
import sklearn
from sklearn.model_selection import train_test_split

In [23]:
sklearn.__version__

'1.2.2'

In [24]:
import imblearn
from imblearn.over_sampling import SMOTE

In [25]:
imblearn.__version__

'0.11.0'

In [26]:
oversample = SMOTE()
X, y = oversample.fit_resample(X, y)

In [27]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)
X.shape

(3524, 29)

In [28]:
X = torch.from_numpy(X).type(torch.float32)
y = torch.from_numpy(y).type(torch.float32)

In [29]:
X.shape

torch.Size([3524, 29])

In [30]:
X[0]

tensor([ 1.2682,  1.2712,  1.3031, -0.8145,  1.2706,  1.5302,  1.2952,  1.1792,
         1.3001,  1.2992,  0.9865,  1.3005,  1.1750,  1.2996,  1.2750,  0.5609,
         0.5592, -0.5341,  0.5571,  0.0629,  0.5577,  0.5226,  0.4459,  0.4397,
        -0.2018,  0.4411,  0.1716,  0.4403,  0.3964])

In [31]:
y.shape

torch.Size([3524])

In [32]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42
)

print("X_train:", X_train[:1],"\nX_train_shape:", X_train[:1].shape,"\nX_test:", X_test[:1],"\nX_test_shape:",X_test[:1].shape, "\ny_train:", y_train[:1],"\ny_test:", y_test[:1])

X_train: tensor([[ 0.3015,  1.2712, -0.3301,  1.4125,  0.3087,  0.1318,  0.3203,  0.6827,
          0.7315,  0.7306,  0.7347,  0.7320,  0.7631,  0.7310,  0.7056,  0.3415,
          0.3407, -0.0961,  0.3386,  0.1715,  0.3391,  0.2977,  1.0863,  1.0794,
          0.9490,  1.0817,  1.0713,  1.0807,  1.0455]]) 
X_train_shape: torch.Size([1, 29]) 
X_test: tensor([[ 1.9127,  1.5914,  1.6297, -0.3036,  1.5912,  1.8798,  1.6201,  1.6756,
          1.2369,  1.3623,  1.2382,  1.2373,  1.2700,  1.2364,  1.2750,  1.4385,
          1.4336,  1.4369,  1.4311,  1.4739,  1.4321,  1.4221,  1.7268,  1.7191,
          1.7163,  1.7222,  1.7782,  1.7212,  1.6945]]) 
X_test_shape: torch.Size([1, 29]) 
y_train: tensor([0.]) 
y_test: tensor([0.])


In [33]:
print("X_train:", X_train[:1])

X_train: tensor([[ 0.3015,  1.2712, -0.3301,  1.4125,  0.3087,  0.1318,  0.3203,  0.6827,
          0.7315,  0.7306,  0.7347,  0.7320,  0.7631,  0.7310,  0.7056,  0.3415,
          0.3407, -0.0961,  0.3386,  0.1715,  0.3391,  0.2977,  1.0863,  1.0794,
          0.9490,  1.0817,  1.0713,  1.0807,  1.0455]])


In [34]:
torch.save(X_train, "/tmp/X_train.pt")

In [35]:
X_train_loaded = torch.load("/tmp/X_train.pt")
print("X_train:", X_train_loaded[:1])

X_train: tensor([[ 0.3015,  1.2712, -0.3301,  1.4125,  0.3087,  0.1318,  0.3203,  0.6827,
          0.7315,  0.7306,  0.7347,  0.7320,  0.7631,  0.7310,  0.7056,  0.3415,
          0.3407, -0.0961,  0.3386,  0.1715,  0.3391,  0.2977,  1.0863,  1.0794,
          0.9490,  1.0817,  1.0713,  1.0807,  1.0455]])


In [36]:
type(X_train)

torch.Tensor

In [37]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [38]:
# Build model with non-linear activation function
from torch import nn
class InteruptionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer_1 = nn.Linear(in_features=29, out_features=200)
        self.layer_2 = nn.Linear(in_features=200, out_features=100)
        self.layer_3 = nn.Linear(in_features=100, out_features=1)
        self.relu = nn.ReLU() # <- add in ReLU activation function
        # Can also put sigmoid in the model
        # This would mean you don't need to use it on the predictions
        # self.sigmoid = nn.Sigmoid()

    def forward(self, x):
      # Intersperse the ReLU activation function between layers
       return self.layer_3(self.relu(self.layer_2(self.relu(self.layer_1(x)))))

model_3 = InteruptionModel().to(device)
print(model_3)

InteruptionModel(
  (layer_1): Linear(in_features=29, out_features=200, bias=True)
  (layer_2): Linear(in_features=200, out_features=100, bias=True)
  (layer_3): Linear(in_features=100, out_features=1, bias=True)
  (relu): ReLU()
)


In [39]:
#model_3.state_dict()

In [None]:
# Setup loss and optimizer
loss_fn = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model_3.parameters(), lr=0.0001)

In [None]:
def accuracy_fn(y_true, y_pred):
    correct = torch.eq(y_true, y_pred).sum().item() # torch.eq() calculates where two tensors are equal
    acc = (correct / len(y_pred)) * 100
    return acc

In [None]:
# Fit the model
torch.manual_seed(42)
epochs = 3500

# Put all data on target device
X_train, y_train = X_train.to(device), y_train.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)


for epoch in range(epochs):
    # 1. Forward pass
    y_logits = model_3(X_train).squeeze()

    y_pred = torch.round(torch.sigmoid(y_logits)) # logits -> prediction probabilities -> prediction labels

    # 2. Calculate loss and accuracy
    loss = loss_fn(y_logits, y_train) # BCEWithLogitsLoss calculates loss using logits
    acc = accuracy_fn(y_true=y_train,
                      y_pred=y_pred)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. Loss backward
    loss.backward()

    # 5. Optimizer step
    optimizer.step()

    ### Testing
    model_3.eval()
    with torch.no_grad():
      # 1. Forward pass
        test_logits = model_3(X_test).squeeze()
        #print(test_logits.shape)
        test_pred = torch.round(torch.sigmoid(test_logits)) # logits -> prediction probabilities -> prediction labels
        # 2. Calcuate loss and accuracy
        test_loss = loss_fn(test_logits, y_test)
        test_acc = accuracy_fn(y_true=y_test,
                             y_pred=test_pred)


    # Print out what's happening
    if epoch % 500 == 0:
        print(f"Epoch: {epoch} | Loss: {loss:.5f}, Accuracy: {acc:.2f}% | Test Loss: {test_loss:.5f}, Test Accuracy: {test_acc:.2f}%")

In [None]:
model_3.eval()
with torch.no_grad():
     y_preds = torch.round(torch.sigmoid(model_3(X_test))).squeeze()

In [None]:
y_preds.shape,y_test.shape

In [None]:
if device == "cuda":
  predictions = y_preds.cpu().numpy() #if it is cuda, then this, otherwise y_pred.numpy()
  true_labels = y_test.cpu().numpy()
else:
  predictions = y_preds.numpy()
  true_labels = y_test.numpy()

In [None]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score,fbeta_score

print("=== Confusion Matrix ===")
print(confusion_matrix(true_labels, predictions))
print('\n')


print("=== Score ===")
accuracy = accuracy_score(true_labels, predictions)
print('Accuracy: %f' % accuracy)

precision = precision_score(true_labels,  predictions, average='weighted')
print('Precision: %f' % precision)
recall = recall_score(true_labels, predictions, average='weighted')
print('Recall: %f' % recall)

microf1 = f1_score(true_labels, predictions, average='micro')
print('Micro F1 score: %f' % microf1)
macrof1 = f1_score(true_labels, predictions, average='macro')
print('Macro F1 score: %f' % macrof1)

In [None]:
target_names = ['No-Stall', 'Stall']
# Print precision-recall report
print(classification_report(true_labels, predictions, target_names=target_names))

In [None]:
#Done