In [3]:
import pandas as pd
import requests
from mitrecve import crawler
import random
from pathlib import Path

In [9]:
# read original labelled file to match
data_path = Path.cwd().parent.joinpath('data', 'label_data.csv').as_posix()
labelled_df = pd.read_csv(data_path, index_col=0)

# get all malicious packages
malicious_df = labelled_df[labelled_df['Label'] == 1]
# keep necessary columns
mal_df = malicious_df[['Name', 'Version', 'Label']].copy()

In [10]:
# match mitre for CVE Descriptions
def mitre_cve_api(package):
    cve_simple = crawler.get_main_page(package) 
    return crawler.get_cve_detail(cve_simple)


def extract_random_desc(package_name):
    try:
        result = mitre_cve_api(package_name)
        descs = [entry.get('DESC', '') for entry in result.values() if 'DESC' in entry]
        return random.choice(descs) if descs else ''
    except Exception as e:
        print(f"Error querying {package_name}: {e}")
        return ''


In [12]:
mal_df['Desc'] = mal_df['Name'].apply(extract_random_desc)


In [None]:
# save the results
mal_df.to_csv('mal_packages_with_desc.csv', index=False)