## Validation of JTAG Jitter Measurement

In [1]:
import numpy as np
import chipwhisperer as cw
import os

In [2]:
%run "functions.ipynb"

In [3]:
data_dir = "data_store"

## ChipWhisperer Configuration (Shared)

In [4]:
# Set hardware settings
SCOPETYPE = 'OPENADC'
PLATFORM = 'CW308_SAM4S'
CRYPTO_TARGET='TINYAES128C' # 'TINYAES128C' or 'MBEDTLS'
SS_VER='SS_VER_2_1'

In [8]:
# Connect to ChipWhisperer
#%run "../../../Setup_Scripts/Setup_Generic.ipynb"

#scope = cw.scope(bitstream = r"C:\Users\cofly\Dropbox (Personal)\academic_dev_work\cwhusky_top.bit")
scope = cw.scope(bitstream = r"C:\dev\cwhusky-fpga-jtag\cwhusky_top_jtagclknoglitch.bit")
target = cw.target(scope, cw.targets.SimpleSerial2)

In [9]:
scope.default_setup()

scope.gain.mode                          changed from low                       to high                     
scope.gain.gain                          changed from 0                         to 22                       
scope.gain.db                            changed from 15.0                      to 25.091743119266056       
scope.adc.samples                        changed from 131124                    to 5000                     
scope.clock.clkgen_freq                  changed from 0                         to 7370129.87012987         
scope.clock.adc_freq                     changed from 0                         to 29480519.48051948        
scope.clock.extclk_monitor_enabled       changed from True                      to False                    
scope.clock.extclk_tolerance             changed from 1144409.1796875           to 13096723.705530167       
scope.io.tio1                            changed from serial_tx                 to serial_rx                
scope.io.tio2      

In [10]:
def jtag_clkout(enabled):
    if enabled:
        data = 0x08
    else:
        data = 0x00

    CODE_READ = 0x80
    CODE_WRITE = 0xC0
    scope.userio.oa.sendMessage(CODE_WRITE, "USERIO_DEBUG_DRIVEN", [data])
    
    # Can use this to check write worked
    #scope.userio.oa.sendMessage(CODE_READ, "USERIO_DEBUG_DRIVEN")
    

In [11]:
scope.clock.clkgen_freq = 20E6
scope.clock.adc_mul = 1

In [12]:
scope.clock

clkgen_src             = system
clkgen_freq            = 20000000.0
adc_mul                = 1
adc_freq               = 20000000.0
freq_ctr               = 0
freq_ctr_src           = extclk
clkgen_locked          = True
adc_phase              = 0
extclk_monitor_enabled = False
extclk_error           = False
extclk_tolerance       = 13096723.705530167

## Build Firmware - 2 MHz Internal Clock, no External Output

In [None]:
C_EXTRA_DEFS="-DUSE_PLL -DUSE_EMBEDDED_CLOCK -DPLL_15MHZ"

In [None]:
%%bash -s "$PLATFORM" "$CRYPTO_TARGET" "$SS_VER" "$C_EXTRA_DEFS"
# compile firmware
cd ../colin-hacktest1/firmware/simpleserial-aes-spitest
make PLATFORM=$1 CRYPTO_TARGET=$2 SS_VER=$3 C_EXTRA_DEFS="$4" -j

In [None]:
# program firmware onto target
prog = cw.programmers.SAM4SProgrammer
cw.program_target(scope, prog, "../colin-hacktest1/firmware/simpleserial-aes-spitest/simpleserial-aes-{}.hex".format(PLATFORM))

In [None]:
# Baud is lower so it works from internal oscillator which isn't as precise
target.baud = 38400

In [None]:
# When device is running at 2 MHz baud calculation is off - measuring actual baud shows this is correct:
# Uncomment this when compiling for 2 MHz firmware
#target.baud = 62750

## JTAG Setup

In [None]:
scope.io.hs2 = None

#### Setting up JTAG into Bypass Mode

In [None]:
def read_tdo_status():
    pins = scope.userio.status
    if pins & (1<<3):
        return True
    else:
        return False
    
def write(tms, tdi):
    old = scope.userio.drive_data
    old &= ~(1<<6 | 1<<7)
    if tms:
        old |= 1<<6
    if tdi:
        old |= 1<<7
    
    scope.userio.drive_data = old
    scope.userio.drive_data = old | (1<<5)
    scope.userio.drive_data = old & ~(1<<5)

Normally `JTAGSEL` being low works fine. Sometimes it's helpful to set it high (call the following function with `True`) for testing. Note when `JTAGSEL` is high code won't run on the microcontroller. But the bypass mode worked fine with this set `False`.

This assumes you've modified the SAM4S2AA board to route TIO3 to JTAGSEL, see this photo:


Note this should NOT be required, so you can recreate the results with a stock ChipWhisperer-Husky kit.

In [None]:
def change_jtag_mode(boundary_scan=False):
    scope.io.tio3 = boundary_scan
    scope.io.nrst = False
    time.sleep(0.05)
    scope.io.nrst = True

The following requires you to put the 20-pin connector from the USERIO pins on the CW-HUSKY to the JTAG header on the CW313 target board. With that connected, you have the following:

* d[2] = nrst
* d[3] = tdo
* d[4] = rclk
* d[5] = tck
* d[6] = tms
* d[7] = tdi

Running the following code will enable bypass mode and then tri-state the TCK & TDI pins. You can then feed a 40 MHz clock into TCK & a 20 MHz clock into TDI, be sure there is a 90 degree phase offset so the rising edge of the 40 MHz clock correctly clocks the 20 MHz clock into TDI.

If it worked, you should see a 20 MHz clock coming out of TDO. The 20 MHz clock is what we use to measure the delay in the target device.

In [None]:
def setup_bypass(verbose=True):
    #Take control of TDI, TMS, TCK
    scope.userio.direction = 0b11100000
    
    write(1, 1)
    write(1, 1)
    write(1, 1)
    write(1, 1)
    write(1, 1)

    write(0, 1) #
    write(1, 1)
    write(1, 1)
    write(0, 1)
    write(0, 1)

    #Send a bunch of 1's to force bypass mode
    for i in range(0, 10):
        write(0, 1)

    #exit shift-IR state
    write(1,1)

    write(1, 1)
    write(1, 1)
    write(0, 1)
    write(0, 1)

    for i in range(0, 10):
        write(0, 0)

    tdo_result = []

    for i in range(0, 10):
        tdo_result.append(read_tdo_status())
        if i == 0:
            write(0, 1)
        else:
            write(0, 0)

    if tdo_result[0:10] == [False, True, False, False, False, False, False, False, False, False]:
        if verbose:
            print("JTAG Setup successful - bypass mode enabled, saw '1' sequence successfuly")
        return True
    else:
        if verbose:
            print("JTAG Setup not successful")
            print(tdo_result)
        return False

In [None]:
jtag_clkout(False)
setup_bypass()

In [None]:
scope.clock.clkgen_freq = 5E6

The following will turn on the TDI/TCK clock. If you externally jumper TDO to HS1 you can confirm the clock is coming back OK.

In [None]:
jtag_clkout(True)

In [None]:
scope.clock

In [None]:
 scope.clock.pll._warning_freq = 400E6

In [None]:
import time
lasterror = 0
errorlist = []
for i in range(15, 300, 1):
    
    jtag_clkout(False)
    if setup_bypass(False) == False:
        #raise IOError("JTAG Setup error!")
        pass
    scope.clock.clkgen_freq = i*1E6
    jtag_clkout(True)
    time.sleep(0.1)
    
    expected = i*1E6 / 2
    actual = scope.clock.freq_ctr
    
    error = (actual - expected) / expected * 100
    
    print("{:.1f} MHz TCK: {:.1f} MHz input (error = {:.2f}%)".format(i, actual/1E6, error))

    errorlist.append({"tck":i, "actual":actual})

In [None]:
#np.save("sam4s2aa_errorlist.npy", errorlist)
#np.save("mpc5676r_errorlist.npy", errorlist)
#np.save("k24f_errorlist.npy", errorlist)
#np.save("stm32f303_errorlist.npy", errorlist)
#np.save("stm32g474_errorlist.npy", errorlist)

## ChipWhisperer Measurement Setup

In [None]:
jtag_clkout(False)
setup_bypass()

In [None]:
# Default to 200 MHz TCK - OK for SAM4S2AA, not all devices will support this high of a frequency however.
scope.clock.clkgen_freq = 200E6

In [None]:
jtag_clkout(True)

In [None]:
scope.gain.mode = "low"
scope.gain.gain = 25

In [None]:
scope.clock.clkgen_freq = 200E6

### TVLA for Jitter Measurement

In [None]:
scope.adc.samples = 20000

In [None]:
scope.adc.offset

In [None]:
splot = cw.StreamPlot()
splot.plot()

In [None]:
N = 1000
group1, group2 = capture_ttest(N, picoscope=False, splot=splot)

In [None]:
first = []
for g in group1:
    first.append(g[0])
avg = np.mean(first)

In [None]:
group1resync = []
for g in group1:
    if g[0] > avg:
        group1resync.append(g[:-1])
    else:
        group1resync.append(g[1:])

group2resync = []
for g in group2:
    if g[0] > avg:
        group2resync.append(g[:-1])
    else:
        group2resync.append(g[1:])

In [None]:
#nptsave("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async", group1, group2)

In [None]:
#nptsave("cwhusky_jtag_mixeronport_2mhzcpu_200mhzadc_50kpts_1ktraces_async", group1, group2)

In [None]:
#group1, group2, N = nptload("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async")

In [None]:
from scipy import signal
sos = signal.butter(5, 0.01, 'highpass', output='sos')

group11 = signal.sosfilt(sos, group1resync)
group12 = signal.sosfilt(sos, group2resync)

In [None]:
import numpy as np
mean1 = np.mean(group11, axis=0)#[2000:]
mean2 = np.mean(group12, axis=0)#[2000:]
cw.plot(mean2) * cw.plot(mean1)

In [None]:
from scipy.stats import ttest_ind
t_val = ttest_ind(group11, group12, axis=0, equal_var=False)[0]

In [None]:
plot_t(t_val, N, "JTAG Delay Measurement, 15MHz CPU, 100 MHz TDI, Bypass Enabled")

## CPA Measurement

In [None]:
from tqdm.notebook import trange
import numpy as np
import time

ktp = cw.ktp.Basic() # default - fixed key, random plaintext

textins = []
textouts = []
waves = []
keys = []

N = 10000
for i in trange(N, desc='Capturing traces'):
    key, text = ktp.next() # new plaintext, same key
    #ps.runBlock()
    trace = cw.capture_trace(scope, target, text, key) # set key, send plaintext, receive ciphertext, capture power trace
    if not trace:
        continue
    
    #while ps.isReady() == False:
    #    continue
    
    #wave = ps.getDataV('A')
    
    wave = trace.wave
 
    waves.append(wave)
    textins.append(trace.textin)
    textouts.append(trace.textout)
    keys.append(trace.key)

    # Update our plot with a new trace
    if i % 5 == 0:
        splot.update(wave) # wave is the name for the data for our power trace

In [None]:
first = []
for w in waves:
    first.append(w[0])
avg = np.mean(first)

In [None]:
tracesresync = []
for w in waves:
    if w[0] < avg:
        tracesresync.append(w[:-1])
    else:
        tracesresync.append(w[1:])

In [None]:
cw.plot(tracesresync[0]) * cw.plot(tracesresync[1]) * cw.plot(tracesresync[10])

In [None]:
%run "functions.ipynb"

In [None]:
data_dir = "data_store"

In [None]:
import numpy as np
import zarr
from numcodecs import lz4
from numcodecs import Blosc

def zarr_store(filename, waves, textins, textouts, keys):

    compressor = None
    #compressor = Blosc(cname='lz4hc', clevel=9, shuffle=Blosc.SHUFFLE)
    compressor_metadata = None
    zarr_dir = zarr.DirectoryStore(filename)
    zarr_group = zarr.hierarchy.group(store=zarr_dir)
    zarr_group_tile = zarr_group.require_group("0/0")

    samples = waves[0].shape[0]

    zarr_group_tile.zeros(
        name="plaintext",
        shape=(len(textins),16),
        chunks=(len(textins),16),
        dtype=np.uint8,
        compressor=compressor_metadata
    )

    zarr_group_tile.zeros(
        name="key",
        shape=(len(textins),16),
        chunks=(len(textins),16),
        dtype=np.uint8,
        compressor=compressor_metadata
    )

    zarr_group_tile.zeros(
        name="ciphertext",
        shape=(len(textins),16),
        chunks=(len(textins),16),
        dtype=np.uint8,
        compressor=compressor_metadata
    )

    zarr_group_tile.zeros(
        name="traces",
        shape=(len(waves),samples),
        chunks=(len(waves),samples),
        dtype=np.double, # int8 or int16
        compressor=compressor
    )

    zarr_group_tile['traces'] = waves
    zarr_group_tile['plaintext'] = textins
    zarr_group_tile['ciphertext'] = textouts
    zarr_group_tile['key'] = keys

    zarr_dir.close()

    print(zarr_group_tile.tree())

In [None]:
#import shutil
#shutil.rmtree("jtag_200msps_10k_cpa_resync")

In [None]:
zarr_store("jtag_200msps_10k_cpa_resync", tracesresync, textins, textouts, keys)

In [None]:
from scarr.engines.cpa import CPA as cpa
from scarr.file_handling.trace_handler import TraceHandler as th
from scarr.models.subBytes_weight import SubBytes_weight
from scarr.container.container import Container, ContainerOptions

In [None]:
dataset_name = "jtag_200msps_10k_cpa_resync"
dataset = zarr.open(dataset_name, "r")

In [None]:
handler2 = th(fileName=dataset_name)
model = SubBytes_weight()
engine2 = cpa(model)
container2 = Container(options=ContainerOptions(engine=engine2, handler=handler2))

In [None]:
# this should be quite fast on most modern computers (<2 min)
container2.run()

In [None]:
results2 = container2.engine.get_result()

In [None]:
container2.engine.get_candidate()

In [None]:
import matplotlib.pylab as plt
fig, ax = plt.subplots(figsize=(32, 4))
ax.plot(results2[0,0,0,:,:].T, color='gray')
ax.plot(results2[0,0,0,dataset["0/0/key"][0,0],:].T, color='red')

ax.set_xlabel('Samples')
ax.set_ylabel('CPA')
plt.show()

### TVLA for Baseline (JTAG Bypass Disabled)

Turn OFF the 40 MHz TCLK & 20 MHz TDI clock (e.g., turn off signal generator output). Then run the following block which will perform a JTAG TAP reset to set it back to a normal state (where bypass will be disabled).

In [None]:
#Take control of TDI, TMS, TCK
scope.userio.direction = 0b11100000
write(1, 1)
write(1, 1)
write(1, 1)
write(1, 1)
write(1, 1)
scope.userio.direction = 0b01000000

Turn back on the 40 MHz TCLK & 20 MHz TDI clock. Confirm that no clock is coming out of TDO.

In [None]:
scope.adc.samples = 15000

In [None]:
splot = cw.StreamPlot()
splot.plot()

In [None]:
N = 10000
group1, group2 = capture_ttest(N, picoscope=False, splot=splot)

In [None]:
#nptsave("cwhusky_jtag_notenabled_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async", group1, group2)

In [None]:
group1, group2, N = nptload("cwhusky_jtag_notenabled_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async")

In [None]:
from scipy import signal
sos = signal.butter(5, 0.015, 'highpass', output='sos')

group11 = signal.sosfilt(sos, group1)
group12 = signal.sosfilt(sos, group2)

In [None]:
import numpy as np
mean1 = np.mean(group11, axis=0)[2000:]
mean2 = np.mean(group12, axis=0)[2000:]
cw.plot(mean2) * cw.plot(mean1)

In [None]:
from scipy.stats import ttest_ind
t_val = ttest_ind(group11, group12, axis=0, equal_var=False)[0]

In [None]:
plot_t(t_val, N, "JTAG Delay Measurement, 2MHz CPU, 20 MHz TDI, Bypass Disabled")

## Countermeasure Testing

### Countermeasure Description

The countermeasure is run on a TinyFPGA BX. It uses a 148 MHz clock to "reclock" the TDO line. This doesn't seem to impact usage of JTAG tools at normal speeds (250kHz - 10 MHz).


```
module top (
   input  CLK,   // 16 MHz on-board clock
   output LED,   // on-board LED
   output USBPU,  // USB pull-up enable, set low to disable

   input PIN_2,
   output PIN_3,
   output PIN_13


   );

    /**
    * PLL configuration
    *
    * This Verilog module was generated automatically
    * using the icepll tool from the IceStorm project.
    * Use at your own risk.
    *
    * Given input frequency:        16.000 MHz
    * Requested output frequency:  150.000 MHz
    * Achieved output frequency:   148.000 MHz
    */
    wire clock_in;
    wire clock_out;
    wire locked;
    SB_PLL40_CORE #(
                    .FEEDBACK_PATH("SIMPLE"),
                    .DIVR(4'b0000),         // DIVR =  0
                    .DIVF(7'b0100100),      // DIVF = 36
                    .DIVQ(3'b010),          // DIVQ =  2
                    .FILTER_RANGE(3'b001)   // FILTER_RANGE = 1
            ) uut (
                    .LOCK(locked),
                    .RESETB(1'b1),
                    .BYPASS(1'b0),
                    .REFERENCECLK(clock_in),
                    .PLLOUTCORE(clock_out)
                    );


  reg relatched;
  wire clock;

  //PLL connection
  assign clock_in = CLK;
  assign clock = clock_out;

  //Countermeasure OFF - uncomment this line, comment out the below
  //assign relatched = PIN_2;


  //Countermeasure ON - uncomment this line, comment out hte above
  always @(posedge clock) relatched <= PIN_2;

   assign PIN_3 = relatched;
   assign PIN_13 = relatched;


   assign LED = 1'b1;  // blink on-board LED every second
   assign USBPU = 1'b0;   // disable USB
endmodule  // top
```

To program the board, `tinyprog` is used:

```
!pip install tinyprog
```

Note the board is powered from 3.3V from the target device. The USB is only connected during programming.

### CW Setup

We again use 15K samples:

In [None]:
scope.adc.samples = 15000

### Countermeasure - Baseline Measurement

Initially, the countermeasre ISN'T in the path, but is connected to power. This is to give a baseline with the same setup.

In [None]:
scope.gain.gain = 35
scope.gain.mode = "low"
scope.gain

In [None]:
scope

In [None]:
splot = cw.StreamPlot()
splot.plot()

In [None]:
N = 10000
group1, group2 = capture_ttest(N, picoscope=False, splot=splot)

In [None]:
#nptsave("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgapowereddisconnected", group1, group2)

In [None]:
group1, group2, N = nptload("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgapowereddisconnected")

In [None]:
from scipy import signal
sos = signal.butter(5, 0.03, 'highpass', output='sos')

group11 = signal.sosfilt(sos, group1)
group12 = signal.sosfilt(sos, group2)

In [None]:
import numpy as np
mean1 = np.mean(group11, axis=0)[2000:]
mean2 = np.mean(group12, axis=0)[2000:]
cw.plot(mean2) * cw.plot(mean1)

In [None]:
from scipy.stats import ttest_ind
t_val = ttest_ind(group11, group12, axis=0, equal_var=False)[0]

In [None]:
plot_t(t_val, N, "JTAG Delay Measurement, 2MHz CPU, 20 MHz TDI, TinyFPGA powered but disconnected")

### Countermeasure - FPGA inserted, but direct connection (no reclocking)

The second baseline is where the TinyFPGA is inserted into the TDO pin, but the countermeasure is "disabled" by just doing this in the FPGA design:

```
assign relatched = PIN_2;
```

This means no reclocking is happening. This is to check how much jitter is added just by the additional digital path.

In [None]:
!tinyprog -p countermeasure_ice40_on.bin -b

In [None]:
scope.gain.gain = 35
scope.gain.mode = "low"
scope.gain

In [None]:
splot = cw.StreamPlot()
splot.plot()

In [None]:
N = 1000
group1, group2 = capture_ttest(N, picoscope=False, splot=splot)

In [None]:
#nptsave("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgacountermeasuredisabled", group1, group2)

In [None]:
group1, group2, N = nptload("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgacountermeasuredisabled")

In [None]:
from scipy import signal
sos = signal.butter(5, 0.03, 'highpass', output='sos')
group11 = signal.sosfilt(sos, group1)
group12 = signal.sosfilt(sos, group2)

In [None]:
import numpy as np
mean1 = np.mean(group11, axis=0)[500:]
mean2 = np.mean(group12, axis=0)[500:]
cw.plot(mean2) * cw.plot(mean1)

In [None]:
from scipy.stats import ttest_ind
t_val = ttest_ind(group11, group12, axis=0, equal_var=False)[0]

In [None]:
plot_t(t_val, N, "JTAG Delay Measurement, 2MHz CPU, 20 MHz TDI, Countermeasure Disabled")

### Countermeasure Enabled

The same setup as previously, but the FPGA is reprogrammed with the countermeasure.

In [None]:
!tinyprog -p countermeasure_ice40_on.bin -b

In [None]:
splot = cw.StreamPlot()
splot.plot()

In [None]:
N = 10000
group1, group2 = capture_ttest(N, picoscope=False, splot=splot)

In [None]:
#nptsave("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgacountermeasureenabled", group1, group2)

In [None]:
group1, group2, N = nptload("cwhusky_jtag_vdic_2mhzcpu_20mhzadc_15kpts_10ktraces_async_fpgacountermeasureenabled")

In [None]:
from scipy import signal
sos = signal.butter(5, 0.03, 'highpass', output='sos')

group11 = signal.sosfilt(sos, group1)
group12 = signal.sosfilt(sos, group2)

In [None]:
import numpy as np
mean1 = np.mean(group11, axis=0)[2000:]
mean2 = np.mean(group12, axis=0)[2000:]
cw.plot(mean2) * cw.plot(mean1)

In [None]:
from scipy.stats import ttest_ind
t_val = ttest_ind(group11, group12, axis=0, equal_var=False)[0]

In [None]:
plot_t(t_val, N, "JTAG Delay Measurement, 2MHz CPU, 20 MHz TDI, Countermeasure Enabled")

In [None]:
target.baud = 38400

In [None]:
N = 10000
group1, group2 = capture_ttest(N, picoscope=False, splot=None)