# Binary Segmentation - converts a frequency histogram CSV into time segments

In [11]:
### Import necessary modules
import numpy as np
import matplotlib.pylab as plt
import ruptures as rpt
import csv

In [12]:
### Set parameters
PID = 'p8'
dataset = '4'

n, dim = 500, 3  # number of samples
n_bkps, sigma = 3, 5  # number of change points, noise standart deviation
signal, bkps = rpt.pw_constant(n, dim, n_bkps, noise_std=sigma)

In [13]:
### Open file
file = open("newCSV/Dataset_" + dataset + "/" + PID + ".csv")
signal = np.loadtxt(file, delimiter=",")
print(np.shape(signal))

(127, 49331)


In [18]:
if (len(signal) >20 ):
    number_of_breakpoints = 11
elif (len(signal) > 15): 
    number_of_breakpoints = 6
else:
    number_of_breakpoints = 3

In [19]:
### Generate segments
# change point detection
model = "l2"  # "l1", "rbf", "linear", "normal", "ar"
algo = rpt.Binseg(model=model, jump=1).fit(signal)
my_bkps = algo.predict(n_bkps=number_of_breakpoints)

# show results
print(my_bkps)

[3, 18, 30, 46, 53, 66, 75, 77, 80, 106, 114, 127]


In [20]:
### Convert breakpoint indices into times:
timeArr = []
for bkp in my_bkps:
    timeArr.append(signal[bkp-1][0] / 10)
print(timeArr)

[13054.0, 59522.0, 148394.0, 200896.0, 232761.0, 292362.0, 359200.0, 389886.0, 427956.0, 488727.0, 573082.0, 632381.0]


In [21]:
### Export segments to CSV
startText = ['Arms', 'Terrorist', 'Disappearance','Panda']
outFilename = '../code/ProvSegments/Dataset_' + dataset + '/Segmentation/' + startText[int(dataset) - 1] + '_P' + PID[1] + '_20_4_6_Prov_Segments.csv'
f = open(outFilename, 'w', newline='')

writer = csv.writer(f)
header = ['ID', 'start', 'end', 'length']
writer.writerow(header)

row = [0, 0, timeArr[0], timeArr[0]]
writer.writerow(row)
for i in range(1,number_of_breakpoints):
    row = [i, timeArr[i-1], timeArr[i], timeArr[i] - timeArr[i-1]]
    writer.writerow(row)
f.close()
