# Datasets

## Imports

In [1]:
import os
import datetime
from collections import OrderedDict

import numpy as np
import pandas as pd

import yaml
import requests

import matplotlib.pyplot as plt
import matplotlib
import scienceplots

In [2]:
print(f"Last run: {datetime.datetime.now()}")
print(f"numpy: {np.__version__}", f"pandas: {pd.__version__}", f"matplotlib: {matplotlib.__version__}")

Last run: 2024-03-07 22:49:53.372800
numpy: 1.26.4 pandas: 2.2.1 matplotlib: 3.8.3


In [3]:
%matplotlib inline

In [4]:
plt.style.use(["science", "ieee"])

## Parse Data

In [5]:
yaml_url = 'https://github.com/jinningwang/best-of-ps/blob/master/projects.yaml'
response = requests.get(yaml_url)
if response.status_code == 200:
    tool_lists = yaml.safe_load(response.text)
else:
    print(f"Failed to fetch YAML file from {yaml_url}. Status code: {response.status_code}")

raw_text = '\n'.join(tool_lists['payload']['blob']['rawLines'])
parsed_data = yaml.safe_load(raw_text)
cats_data = parsed_data['categories']

In [6]:
# Initialize the dictionary for analysis

tools = OrderedDict()

for cat in cats_data:
    tools[cat['category']] = OrderedDict()

for tool_dict in parsed_data['projects']:
    cat = tool_dict['category']
    tool = tool_dict['name']
    tools[cat][tool] = OrderedDict()
    tools[cat][tool]['description'] = tool_dict['description'] if 'description' in tool_dict.keys() else None
    tools[cat][tool]['github_id'] = tool_dict['github_id'] if 'github_id' in tool_dict.keys() else None
    tools[cat][tool]['labels'] = tool_dict['labels'] if 'labels' in tool_dict.keys() else None
    tools[cat][tool]['license'] = tool_dict['license'] if 'license' in tool_dict.keys() else None

In [7]:
tools['phasor']

tools['steady-state']

tools.keys()

odict_keys(['phasor', 'emt', 'steady-state', 'intf', 'ops', 'opl', 'mrl', 'cosime', 'gasnet', 'vis', 'msg', 'data', 'pe'])

In [8]:
# AMS as an example to show static code analysis

github_id = tools['steady-state']['LTB AMS']['github_id']
proj_name = github_id.split('/')[-1]
repo_path = f"./../repos/{proj_name}"  # Adjust the path as necessary

# Check if the repository directory already exists
if not os.path.exists(repo_path):
    git_url = f"https://github.com/{github_id}.git"
    clone_command = f"!git clone {git_url} {repo_path}"
    print("Cloning repository...")
    get_ipython().system(clone_command)
else:
    print(f"Repository '{proj_name}' already exists at {repo_path}. Skipping clone.")

Repository 'ams' already exists at ./../repos/ams. Skipping clone.


In [9]:
!semgrep --config=p/python --json ./../repos/{proj_name} > ./../repos/results/{proj_name}.json

               
               
┌─────────────┐
│ Scan Status │
└─────────────┘
  Scanning 195 files tracked by git with 147 Code rules:
  Scanning 85 files with 147 python rules.
[2K  [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [35m100%[0m [33m0:00:01[0m                                                                                                                        [90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[90m━[0m[35m━[0m[91m━[0m[91m━[0m[91m━[0m[91m━[0m[91m━[0m[91m━[0m[91m━[0m[35m━[0m[90m━[0m [35m  0%[0m [33m-:--:--[0m
[?25h                
                
┌──────────────┐
│ Scan Summary │
└──────────────┘
Some files were skipped or only partially analyzed.
  Scan was limited to files tracked by git.
  Scan skipped: 16 files matching .semgrepignore patterns
  For a full list of skipped files, run semgrep with the --verbose flag.

Ran 147 rules on 85 files: 1 finding.
