In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn_pandas
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from torch.autograd import Variable

In [None]:
USE_CUDA = torch.cuda.is_available()
device = torch.device("cuda" if USE_CUDA else "cpu")

In [None]:
df = pd.read_csv("customized-mutants.csv")

In [None]:
df.columns

Index(['projectId', 'bugId', 'methodName', 'mutantId', 'compositeId',
       'className', 'lineNumber', 'testSignature', 'mutationOperatorGroup',
       'mutationOperator', 'nodeTypeBasic', 'nodeTypeDetailed',
       'nodeContextBasic', 'astContextBasic', 'astContextDetailed',
       'astStmtContextBasic', 'astStmtContextDetailed', 'parentContextBasic',
       'parentContextDetailed', 'parentStmtContextBasic',
       'parentStmtContextDetailed', 'hasLiteralChild', 'hasVariableChild',
       'hasOperatorChild', 'isCovered', 'coveringTests', 'isKilled',
       'killingTests', 'isTrivial', 'trivialityScore', 'trivialityTests',
       'isDominator', 'dominatorStrength', 'isUnproductive', 'isFaultCoupled',
       'pKillsDom', 'expKilledDomNodes', 'nestingTotal', 'nestingLoop',
       'nestingIf', 'maxNestingInSameMethod', 'nestingRatioTotal',
       'nestingRatioLoop', 'nestingRatioIf', 'numMutantsInSameMethod',
       'maxLineNumberInSameMethod', 'minLineNumberInSameMethod', 'lineRatio'],


In [None]:
df

Unnamed: 0,projectId,bugId,methodName,mutantId,compositeId,className,lineNumber,testSignature,mutationOperatorGroup,mutationOperator,...,nestingLoop,nestingIf,maxNestingInSameMethod,nestingRatioTotal,nestingRatioLoop,nestingRatioIf,numMutantsInSameMethod,maxLineNumberInSameMethod,minLineNumberInSameMethod,lineRatio
0,Chart,1,,33990,Chart-1/33990,org.jfree.chart.renderer.AbstractRenderer,2760,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,LVR,DOUBLE_LITERAL:DOUBLE_LITERAL,...,1,1,2,0.5,0.5,0.5,1427,2763,51,0.998894
1,Chart,1,,94090,Chart-1/94090,org.jfree.chart.urls.TimeSeriesURLGenerator,71,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,EVR,METHOD_INVOCATION:NULL_LITERAL,...,1,1,2,0.5,0.5,0.5,1427,2763,51,0.007375
2,Chart,1,,15000,Chart-1/15000,org.jfree.chart.axis.Axis,157,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,LVR,FLOAT_LITERAL:FLOAT_LITERAL,...,1,1,2,0.5,0.5,0.5,1427,2763,51,0.039086
3,Chart,1,,17827,Chart-1/17827,org.jfree.chart.axis.CategoryLabelPositions,91,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,AOR,DIVIDE:PLUS,...,1,1,2,0.5,0.5,0.5,1427,2763,51,0.014749
4,Chart,1,,28762,Chart-1/28762,org.jfree.chart.plot.ThermometerPlot,231,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,EVR,MULTIPLY:INT_LITERAL,...,1,1,2,0.5,0.5,0.5,1427,2763,51,0.066372
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
121443,Chart,1,zoomRangeAxes(double;org.jfree.chart.plot.Plot...,26470,Chart-1/26470,org.jfree.chart.plot.PolarPlot,1179,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,STD,<CALL>:<NO-OP>,...,1,1,1,1.0,1.0,1.0,11,5259,271,0.182037
121444,Chart,1,zoomRangeAxes(double;org.jfree.chart.plot.Plot...,59742,Chart-1/59742,org.jfree.chart.plot.CombinedDomainCategoryPlot,271,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,LVR,BOOLEAN_LITERAL:TRUE,...,1,1,1,1.0,1.0,1.0,11,5259,271,0.000000
121445,Chart,1,zoomRangeAxes(double;org.jfree.chart.plot.Plot...,60351,Chart-1/60351,org.jfree.chart.plot.CombinedDomainXYPlot,507,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,LVR,BOOLEAN_LITERAL:TRUE,...,1,1,1,1.0,1.0,1.0,11,5259,271,0.047314
121446,Chart,1,zoomRangeAxes(double;org.jfree.chart.plot.Plot...,10229,Chart-1/10229,org.jfree.chart.plot.XYPlot,5259,CIcAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA...,STD,<CALL>:<NO-OP>,...,1,1,1,1.0,1.0,1.0,11,5259,271,1.000000


In [None]:
y_df = df["pKillsDom"]

In [None]:
y_df

0         0.00
1         0.75
2         0.00
3         0.00
4         0.00
          ... 
121443    0.00
121444    0.00
121445    0.00
121446    0.00
121447    0.00
Name: pKillsDom, Length: 121448, dtype: float64

In [None]:
# For all data features
mapper = sklearn_pandas.DataFrameMapper(
    [
            (["lineRatio"], [SimpleImputer(strategy="mean"), StandardScaler()]),
            (
                ["nestingIf", "nestingLoop", "nestingTotal", "maxNestingInSameMethod"],
                StandardScaler(),
            ),
            (
                [
                    "nestingRatioLoop",
                    "nestingRatioIf",
                    "nestingRatioTotal",
                    "hasOperatorChild",
                    "hasVariableChild",
                    "hasLiteralChild",
                ],
                None,
            ),
            (
                ["nodeTypeBasic", "nodeTypeDetailed"],
                [
                    SimpleImputer(strategy="constant", fill_value="Unknown"),
                    OneHotEncoder(handle_unknown="ignore"),
                ],
            ),
            (
                [
                    "mutationOperator",
                    "mutationOperatorGroup",
                    "nodeContextBasic",
                    "astContextBasic",
                    "astContextDetailed",
                    "astStmtContextBasic",
                    "astStmtContextDetailed",
                    "parentContextBasic",
                    "parentContextDetailed",
                    "parentStmtContextBasic",
                    "parentStmtContextDetailed",
                ],
                OneHotEncoder(handle_unknown="ignore"),
            ),
        ]
)

x_all = mapper.fit_transform(df.copy()).astype(np.float32)

In [None]:
# For small data
mapper = sklearn_pandas.DataFrameMapper(
        [
            (
                ["mutationOperator", "parentStmtContextDetailed"],
                OneHotEncoder(handle_unknown="ignore"),
            )
        ]
)
x_all = mapper.fit_transform(df.copy()).astype(np.float32)

In [None]:
x_all

array([[ 1.7901825 , -0.6623826 , -0.42590162, ...,  1.        ,
         0.        ,  0.        ],
       [-1.1310368 , -0.6623826 , -0.42590162, ...,  1.        ,
         0.        ,  0.        ],
       [-1.0376099 , -0.6623826 , -0.42590162, ...,  1.        ,
         0.        ,  0.        ],
       ...,
       [-1.0133685 , -0.6623826 , -0.42590162, ...,  0.        ,
         0.        ,  0.        ],
       [ 1.7934415 , -0.6623826 , -0.42590162, ...,  0.        ,
         0.        ,  0.        ],
       [ 1.7934415 , -0.6623826 , -0.42590162, ...,  0.        ,
         0.        ,  0.        ]], dtype=float32)

In [None]:
y_all = df.pKillsDom.values.copy()

In [None]:
y_all

array([0.  , 0.75, 0.  , ..., 0.  , 0.  , 0.  ])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(x_all, y_all, test_size=0.5, random_state=42)
X_train = torch.FloatTensor(X_train).to(device)
X_test = torch.FloatTensor(X_test).to(device)
y_train = torch.LongTensor(y_train).to(device)
y_test = torch.LongTensor(y_test).to(device)

In [None]:
class ANN(nn.Module):
    def __init__(self):
        super().__init__()
        # self.fc1 = nn.Linear(in_features=1408, out_features=64) # 9617 167  # 1408 111
        # self.fc2 = nn.Linear(in_features=64, out_features=30)
        # self.output = nn.Linear(in_features=30, out_features=1)

        self.fc1 = nn.Linear(in_features=9617, out_features=100) # 9617 167  # 1408, 111  # 4176
        self.fc2 = nn.Linear(in_features=100, out_features=50)
        self.fc3 = nn.Linear(in_features=50, out_features=25)
        self.fc4 = nn.Linear(in_features=25, out_features=10)
        self.output = nn.Linear(in_features=10, out_features=1)
 
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.softmax(self.output(x))
        return x

In [None]:
model = ANN().to(device)
criterion = nn.L1Loss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # 0.01, 0.05, 0.1

In [None]:
len(X_train[0])

9617

In [None]:
y_test

tensor([0, 0, 0,  ..., 0, 0, 0], device='cuda:0')

In [None]:
epochs = 100
loss_arr = []

for i in range(epochs):
    y_hat = model.forward(X_train)
    loss = criterion(y_hat, y_train)
    loss_arr.append(loss)
    
 
    if i % 10 == 0:
        print(f'Epoch: {i} Loss: {loss}')
 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

  x = F.softmax(self.output(x))
  return F.l1_loss(input, target, reduction=self.reduction)


Epoch: 0 Loss: 0.8287333250045776


OutOfMemoryError: ignored

In [None]:
loss_arr

In [None]:
loss_list = [loss_tensor.item() for loss_tensor in loss_arr]

In [None]:
plt.title('Loss VS Epoch')
plt.xlabel("Loss")
plt.xlabel("Epoch")
plt.plot(loss_list)