In [1]:
!pip install lief
!pip install boto3

Collecting lief
  Downloading lief-0.13.2-cp310-cp310-manylinux_2_24_x86_64.whl.metadata (4.0 kB)
Downloading lief-0.13.2-cp310-cp310-manylinux_2_24_x86_64.whl (4.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.0/4.0 MB[0m [31m32.5 MB/s[0m eta [36m0:00:00[0m00:01[0m:00:01[0m
[?25hInstalling collected packages: lief
Successfully installed lief-0.13.2


In [2]:
import torch
import torch.nn as nn

class MalConv(nn.Module):
    def __init__(self, input_length=2000000, embedding_dim=8, window_size=128, output_dim=1):
        super(MalConv, self).__init__()
        self.embed = nn.Embedding(257, embedding_dim, padding_idx=0)  # 256 unique bytes, embedding dimension
        self.conv1 = nn.Conv1d(in_channels=embedding_dim, out_channels=1024, kernel_size=8, stride=8)
        self.conv2 = nn.Conv1d(in_channels=1024, out_channels=512, kernel_size=8, stride=8)
        self.conv3 = nn.Conv1d(in_channels=512, out_channels=256, kernel_size=8, stride=8)
        self.dropout = nn.Dropout(p=0.25)
        self.fc1 = nn.Linear(256, 128)
        self.fc2 = nn.Linear(128, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embed(x.clamp(min=0, max=256))  # Ensure indices are within the valid range
        x = x.transpose(1, 2)  # Conv1d expects (batch_size, channels, length)
        x = self.conv1(x)
        x = torch.relu(x)
        x = self.dropout(x)  # Apply dropout after the first convolutional layer
        x = self.conv2(x)
        x = torch.relu(x)
        x = self.dropout(x)  # Apply dropout after the second convolutional layer
        x = self.conv3(x)
        x = torch.relu(x)
        x = self.dropout(x)  # Apply dropout after the third convolutional layer
        x = torch.squeeze(torch.max(x, dim=2)[0])  # Global max pooling
        x = self.fc1(x)
        x = self.dropout(x)  # Apply dropout after the first fully connected layer
        x = torch.relu(x)
        x = self.fc2(x)
        x = self.dropout(x)  # Apply dropout after the second fully connected layer
        x = self.sigmoid(x)
        return x

model = MalConv()
print(model)

MalConv(
  (embed): Embedding(257, 8, padding_idx=0)
  (conv1): Conv1d(8, 1024, kernel_size=(8,), stride=(8,))
  (conv2): Conv1d(1024, 512, kernel_size=(8,), stride=(8,))
  (conv3): Conv1d(512, 256, kernel_size=(8,), stride=(8,))
  (dropout): Dropout(p=0.25, inplace=False)
  (fc1): Linear(in_features=256, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=1, bias=True)
  (sigmoid): Sigmoid()
)


In [8]:
# Define a function to load the trained model
def load_model(model_path):
    model = MalConv()  # Initialize model
    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
    model.eval()
    return model

In [9]:
import lief
import numpy as np
import torch

def testPE(pe_path):
    # Load Model
    model = load_model('vMalConv/my_model.pth')

    # Load the PE file
    try:
        pe = lief.parse(pe_path)
    except lief.read_error:
        return "Invalid PE file"

    # Extract Features
    bytez = np.fromfile(pe_path, dtype=np.uint8)
    if len(bytez) < 2000000:
        bytez = np.pad(bytez, (0, 2000000 - len(bytez)), mode='constant')
    features = torch.tensor(bytez[:2000000], dtype=torch.long).unsqueeze(0)

    # Run the model on the features
    with torch.no_grad():
        output = model(features)

    # Return the prediction
    return "Malware" if output.item() > 0.5 else "Benign"

In [10]:
# Classify the PE file
prediction = testPE('PUTTY.EXE')

# Print the prediction
print("Prediction:", prediction)

Prediction: Benign


In [11]:
import tarfile

tar = tarfile.open("model.tar.gz", "w:gz")
tar.add("vMalConv/", arcname="model")
tar.close()

In [12]:
import boto3

s3 = boto3.resource("s3")

s3.Bucket("jafar.vohra.malconv.model").upload_file('model.tar.gz', Key='model.tar.gz')

In [13]:
import sagemaker
from sagemaker.pytorch import PyTorchModel
from sagemaker import get_execution_role

role = get_execution_role()

sagemaker_session = sagemaker.Session()

# Create a SageMaker model
model = PyTorchModel(model_data='s3://jafar.vohra.malconv.model/model.tar.gz',
                      role=role,
                      entry_point='inference.py',
                      #source_dir='model/code/',
                      py_version='py3')

# Deploy the model to an endpoint
predictor = model.deploy(instance_type='ml.m5.xlarge',
                         initial_instance_count=1,
                         wait=True)

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml
------!