# Example to use

In [1]:
import pandas as pd
from ast import literal_eval

## First Read a data file, in my case in csv format

In [2]:
path = './datasets'
path_res = '%s/pei_dataset_modif.csv' %(path)
data = pd.read_csv(path_res, sep=",", header=0,
                   converters={"sequence": literal_eval})

## Separate a data to extract the sequences

In [3]:
sids = list(data["sid"])
sequences = list(data["sequence"])

## Set a configuration options

In [4]:
threshold = 0.2 
options = {'threshold': threshold}

## Encode sequences 

In [5]:
import copper.dataprocessor as dp
items_separated = False
seq = dp.discretize_sequences(sequences, items_separated)
seq

[['1', ['1', '2', '3'], ['1', '3'], '4', ['3', '6']],
 [['1', '4'], '3', ['2', '3'], ['1', '5']],
 [['5', '6'], ['1', '2'], ['4', '6'], ['3', '2']],
 ['5', '7', ['1', '6'], '3', '2', '3']]

## Convert data encoded to spmf format

In [6]:
import copper.fileprocessor as fp
u_db = fp.db_to_spmf(seq)
u_db

['1 -1 1 2 3 -1 1 3 -1 4 -1 3 6 -1 -2 \n',
 '1 4 -1 3 -1 2 3 -1 1 5 -1 -2 \n',
 '5 6 -1 1 2 -1 4 6 -1 3 2 -1 -2 \n',
 '5 -1 7 -1 1 6 -1 3 -1 2 -1 3 -1 -2 \n']

## Read data with options

In [7]:
s_db = fp.readDB(u_db, options)
s_db

['0\x001\x001|2|3\x001|3\x004\x003|6',
 '1\x001|4\x003\x002|3\x001|5',
 '2\x005|6\x001|2\x004|6\x003|2',
 '3\x005\x007\x001|6\x003\x002\x003']

## Mining a patterns in data

In [8]:
import copper.prefixspan as ps
import time
import copper.profiling as pro
mem_before = pro.get_process_memory()

time_start = time.time()
result_mining = ps.prefixspan(s_db, options)
time_end = time.time()
mem_after = pro.get_process_memory()
result_mining

[[<1>, 4],
 [<1, 2>, 2],
 [<1, 2, 3>, 1],
 [<1, 2, 3><1>, 1],
 [<1, 2, 3><1, 3>, 1],
 [<1, 2, 3><1, 3><3>, 1],
 [<1, 2, 3><1, 3><3, 6>, 1],
 [<1, 2, 3><1, 3><4>, 1],
 [<1, 2, 3><1, 3><4><3>, 1],
 [<1, 2, 3><1, 3><4><3, 6>, 1],
 [<1, 2, 3><1, 3><4><6>, 1],
 [<1, 2, 3><1, 3><6>, 1],
 [<1, 2, 3><1><3>, 1],
 [<1, 2, 3><1><3, 6>, 1],
 [<1, 2, 3><1><4>, 1],
 [<1, 2, 3><1><4><3>, 1],
 [<1, 2, 3><1><4><3, 6>, 1],
 [<1, 2, 3><1><4><6>, 1],
 [<1, 2, 3><1><6>, 1],
 [<1, 2, 3><3>, 1],
 [<1, 2, 3><3, 6>, 1],
 [<1, 2, 3><3><3>, 1],
 [<1, 2, 3><3><3, 6>, 1],
 [<1, 2, 3><3><4>, 1],
 [<1, 2, 3><3><4><3>, 1],
 [<1, 2, 3><3><4><3, 6>, 1],
 [<1, 2, 3><3><4><6>, 1],
 [<1, 2, 3><3><6>, 1],
 [<1, 2, 3><4>, 1],
 [<1, 2, 3><4><3>, 1],
 [<1, 2, 3><4><3, 6>, 1],
 [<1, 2, 3><4><6>, 1],
 [<1, 2, 3><6>, 1],
 [<1, 2><1>, 1],
 [<1, 2><1, 3>, 1],
 [<1, 2><1, 3><3>, 1],
 [<1, 2><1, 3><3, 6>, 1],
 [<1, 2><1, 3><4>, 1],
 [<1, 2><1, 3><4><3>, 1],
 [<1, 2><1, 3><4><3, 6>, 1],
 [<1, 2><1, 3><4><6>, 1],
 [<1, 2><1, 3><6>, 1]

## Decode a results of mining

In [9]:
result_mining_undiscretize = dp.undiscretize_sequences(data, result_mining)

[[['1']], 4]
[[['1', '2']], 2]
[[['1', '2', '3']], 1]
[[['1', '2', '3'], ['1']], 1]
[[['1', '2', '3'], ['1', '3']], 1]
[[['1', '2', '3'], ['1', '3'], ['3']], 1]
[[['1', '2', '3'], ['1', '3'], ['3', '6']], 1]
[[['1', '2', '3'], ['1', '3'], ['4']], 1]
[[['1', '2', '3'], ['1', '3'], ['4'], ['3']], 1]
[[['1', '2', '3'], ['1', '3'], ['4'], ['3', '6']], 1]
[[['1', '2', '3'], ['1', '3'], ['4'], ['6']], 1]
[[['1', '2', '3'], ['1', '3'], ['6']], 1]
[[['1', '2', '3'], ['1'], ['3']], 1]
[[['1', '2', '3'], ['1'], ['3', '6']], 1]
[[['1', '2', '3'], ['1'], ['4']], 1]
[[['1', '2', '3'], ['1'], ['4'], ['3']], 1]
[[['1', '2', '3'], ['1'], ['4'], ['3', '6']], 1]
[[['1', '2', '3'], ['1'], ['4'], ['6']], 1]
[[['1', '2', '3'], ['1'], ['6']], 1]
[[['1', '2', '3'], ['3']], 1]
[[['1', '2', '3'], ['3', '6']], 1]
[[['1', '2', '3'], ['3'], ['3']], 1]
[[['1', '2', '3'], ['3'], ['3', '6']], 1]
[[['1', '2', '3'], ['3'], ['4']], 1]
[[['1', '2', '3'], ['3'], ['4'], ['3']], 1]
[[['1', '2', '3'], ['3'], ['4'], ['3', '6

## Obtain a file with a result file

In [11]:
fp.get_result_file(result_mining_undiscretize, options, start, end, mem_after, mem_before)