# Binary Segmentation - converts a frequency histogram CSV into time segments

In [1]:
### Import necessary modules
import numpy as np
import matplotlib.pylab as plt
import ruptures as rpt
import csv

In [2]:
### Set parameters
PID = 'p8'
dataset = '4'

n, dim = 500, 3  # number of samples
n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)

In [3]:
### Open file
file = open("newCSV/Dataset_" + dataset + "/" + PID + ".csv")
signal = np.loadtxt(file, delimiter=",")

In [4]:
### Generate segments
# change point detection
model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
algo = rpt.Binseg(model=model, jump=1).fit(signal)
my_bkps = algo.predict(n_bkps=10)

# show results
print(my_bkps)

[11, 16, 39, 44, 55, 59, 79, 94, 98, 100, 124]


In [5]:
### Convert breakpoint indices into times:
timeArr = []
for bkp in my_bkps:
    timeArr.append(signal[bkp-1][0] / 10)
print(timeArr)

[28000.0, 82000.0, 49000.0, 75000.0, 49000.0, 10000.0, 29000.0, 86000.0, 50000.0, 92000.0, 81000.0]


In [6]:
### Export segments to CSV
startText = ['Arms', 'Terrorist', 'Disappearance','Panda']
outFilename = '../code/ProvSegments/Dataset_' + dataset + '/Segmentation/' + startText[int(dataset) - 1] + '_P' + PID[1] + '_20_4_6_Prov_Segments.csv'
f = open(outFilename, 'w', newline='')

writer = csv.writer(f)
header = ['ID', 'start', 'end', 'length']
writer.writerow(header)

row = [0, 0, timeArr[0], timeArr[0]]
writer.writerow(row)
for i in range(1,11):
    row = [i, timeArr[i-1], timeArr[i], timeArr[i] - timeArr[i-1]]
    writer.writerow(row)
f.close()
