## MLRAN Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df = pd.read_csv('mlran_dataset_metadata.csv')

print("MLRAN METADATA FILE ANALYSIS")

# Dataset Properties
print("\n1. SIZE:")
print(f"   Total records: {df.shape[0]}")
print(f"   Total columns: {df.shape[1]}")
print(f"   Time span: {df['Year'].min()}-{df['Year'].max()}")

# Show columns of the data
print("\n2. COLUMNS IN METADATA:")
print(f"   Features: {list(df.columns)}")

# Show first few rows of the data
print("\n3. FIRST 5 ROWS:")
print(df.head())

# Data types
print("\n4. DATA TYPES:")
print(df.dtypes)

# Class Labels
print("\n5. CLASS LABELS:")
print("\n   Binary Classification:")
print(df['sample_type'].value_counts())
print("\n   Ransomware Families (Top 10):")
print(df['ransomware_family'].value_counts().head())
print("\n   Ransomware Types:")
print(df['ransomware_type'].value_counts())

# Summary
print("\n6. DATASET INFO:")
print(df.describe())

# Visualization
plt.figure(figsize=(10, 5))
df['ransomware_type'].value_counts().plot(kind='bar', color='blue')
plt.title('MLRan: Ransomware Types Distribution')
plt.xlabel('Type')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig('mlran_types.png')
print("\n   Chart saved: mlran_types.png")


## JS Dataset

In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('JSVulnerabilitydataSet-1.0.csv')
print("JAVASCRIPT VULNERABILITY DATASET PROPERTIES")

# Property A: SIZE
print("\n1. SIZE:")
print(f"   Total functions: {len(df):,}")
if 'label' in df.columns:
    print(f"   Vulnerable functions: {(df['label']==1).sum():,}")
    print(f"   Non-vulnerable functions: {(df['label']==0).sum():,}")
else:
    print("   Label column distribution:")
    print(f"   {df.iloc[:, -1].value_counts()}")

# Property B: DIMENSION
print("\n2. DIMENSION:")
print(f"   Total features: {df.shape[1]}")
print(f"   Sample features: {list(df.columns[:10])}")

# Property D: DATA TYPES
print("\n4. DATA TYPES:")
print(df.dtypes)

# Property E: CLASS LABELS
print("\n5. CLASS LABELS:")
print("\n   Binary Classification:")
if 'label' in df.columns:
    label_counts = df['label'].value_counts().sort_index()
    print(f"   Non-vulnerable (0): {label_counts.get(0, 0):,}")
    print(f"   Vulnerable (1): {label_counts.get(1, 0):,}")
else:
    print(df.iloc[:, -1].value_counts())

# Basic statistics
print("\n6. FEATURE STATISTICS:")
print(df.describe().iloc[:, :5])  # Show first 5 features



