<a href="https://colab.research.google.com/github/cd-public/D505/blob/master/ipynb/vcd2df_spark_iflow_demo.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!git clone https://github.com/cd-public/Isadora.git

Cloning into 'Isadora'...
remote: Enumerating objects: 5272, done.[K
remote: Counting objects: 100% (12/12), done.[K
remote: Compressing objects: 100% (7/7), done.[K
remote: Total 5272 (delta 5), reused 9 (delta 5), pack-reused 5260 (from 1)[K
Receiving objects: 100% (5272/5272), 106.03 MiB | 7.26 MiB/s, done.
Resolving deltas: 100% (4388/4388), done.
Updating files: 100% (4176/4176), done.


In [2]:
ls Isadora/model/r5/vcds

alu_add_sub.vcd                   instr_sltu.vcd
alu_eq.vcd                        instr_slt.vcd
alu_lts.vcd                       instr_srai.vcd
alu_ltu.vcd                       instr_sra.vcd
alu_out_0_q.vcd                   instr_srli.vcd
alu_out_0.vcd                     instr_srl.vcd
alu_out_q.vcd                     instr_sub.vcd
alu_out.vcd                       instr_sw.vcd
alu_shl.vcd                       instr_timer.vcd
alu_shr.vcd                       instr_waitirq.vcd
alu_wait_2.vcd                    instr_xori.vcd
alu_wait.vcd                      instr_xor.vcd
cached_ascii_instr.vcd            irq_active.vcd
cached_insn_imm.vcd               irq_delay.vcd
cached_insn_opcode.vcd            irq_mask.vcd
cached_insn_rd.vcd                irq_pending.vcd
cached_insn_rs1.vcd               irq_state.vcd
cached_insn_rs2.vcd               is_alu_reg_imm.vcd
clear_prefetched_high_word_q.vcd  is_alu_reg_reg.vcd
clear_prefetched_high_word.vcd    is_beq_bne_blt_bge_bltu_bgeu.vcd


In [3]:
# adapted from https://github.com/vcd2df/py
# use a generator over a split (still lazy) instead of file ptr

# on my device, /home/user/.local/bin/spark-submit script.py

import pandas as pd
import pickle

def get_vars(lines):
    line = next(lines)
    vars = {} # insertion order >= 3.7
    while "$enddefinitions" not in line:
        if "var" in line:
            parts = line.split()
            if parts[4] not in vars.values():
                vars[parts[3]] = parts[4]
        line = next(lines)
    return vars

def str2df(str):
    lines = (line for line in str.splitlines())
    vars = get_vars(lines)
    names = vars.copy()
    vars = {var:-1 for var in vars.keys()}
    df = {}
    while "$dumpvars" not in next(lines):
        pass
    time = "#0"
    for line in lines:
        if "#" in line[0]: # Check for tick
            df[time] = pd.Series(vars.values())
            time = line.strip()
        else: # Else two cases, words and bits
            if " " in line: # word
                val, var = line[1:].strip().split()
            else: # bit
                val, var = line[0], line[1:].strip()
            if var in vars:
                vars[var] = int(val, 2) if val.isdigit() else -1
    df = pd.DataFrame(df, dtype=int)
    df.index = names.values()
    return df

# "shadow" registers contain iflow status
# find changes from 0 to 1 at nonzero times
def iflow_times(local):
    local = local[local.index.str.contains("shadow")]
    local = local[local.any(axis=1)]
    local = local.idxmax(axis=1)
    local = local[local != "#0"]
    local = local.apply(lambda s : int(s[1:]))
    return local

from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Python Spark SQL vcd2df iflow demo").getOrCreate()

from pyspark.sql.functions import col, udf, input_file_name

# VCDs from here: https://github.com/cd-public/Isadora/tree/master/model/single/vcds
# Set path appropriately, perhaps
df = spark.read.text("Isadora/model/r5/vcds/*.vcd", wholetext=True).withColumn("filename", input_file_name())
df = df.select(col("value"), udf(lambda fn : fn.split("/")[-1].split(".")[0])(col("filename")).alias("src"))
# switch from udf to rdd map to not wrangle schema
mid = df.rdd.map(lambda x: {x[1].replace("shadow_",""):iflow_times(str2df(x[0]))})
xs = [x for x in mid.collect() if not list(x.values())[0].empty] # reduce stage
spark.stop()

In [4]:
xs

[{'decoder_trigger_q': shadow_q_insn_rd        10000
  shadow_q_insn_imm       10000
  shadow_q_ascii_instr    10000
  shadow_q_insn_rs2       10000
  shadow_q_insn_opcode    10000
  shadow_q_insn_rs1       10000
  dtype: int64},
 {'instr_sub': shadow_mem_wdata         1310000
  shadow_mem_addr          1130000
  shadow_alu_shr           1100000
  shadow_alu_shl           1100000
  shadow_alu_out_q           10000
  shadow_cpuregs_rs2       1190000
  shadow_cpuregs_rs1       1080000
  shadow_dbg_rs2val        1280000
  shadow_dbg_rs1val        1100000
  shadow_cpuregs_wrdata    1070000
  shadow_reg_op2           1280000
  shadow_reg_sh            1280000
  shadow_reg_op1           1100000
  shadow_mem_la_wdata      1280000
  shadow_alu_eq            1100000
  shadow_alu_ltu           1100000
  shadow_alu_lts           1100000
  dtype: int64},
 {'instr_lbu': shadow_mem_wdata         1310000
  shadow_alu_shl           1240000
  shadow_alu_add_sub       1240000
  shadow_alu_out_q         