In [9]:
import os
import pandas as pd
import numpy as np
import pickle
from bz2 import BZ2File
from time import strftime, time

DATAFOLDER = './'
DATASET_FILENAME = 'paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230119-163834_windowed2112-mainset-phase3PROCESSED_FEATURES.pickle'

In [10]:
print('Loading dataset from file:',DATASET_FILENAME)


if os.path.splitext(DATASET_FILENAME)[1] == '.bz2':
    print("Reading dataset from compressed pickle...")
    DATASET_PATH = os.path.join(DATAFOLDER,DATASET_FILENAME)
    startime = time()
    ifile = BZ2File(DATASET_PATH,'rb')
    featuredataset = pickle.load(ifile)
    ifile.close()
    print('Successfully Loaded!\nIt took %.1fs to load from compressed pickle' % (time()-startime))
elif os.path.splitext(DATASET_FILENAME)[1] == '.pickle':
    print("Reading dataset from pickle...")
    DATASET_PATH = os.path.join(DATAFOLDER,DATASET_FILENAME)
    startime = time()
    with open(DATASET_PATH,'rb') as pf:
        featuredataset = pickle.load(pf)
    print('Successfully Loaded!\nIt took %.1fs to load from regular pickle' % (time()-startime))
else:
    raise Exception("Extension %s not supported!" % os.path.splitext(DATASET_FILENAME)[1])
print('Dataset loaded!')
# display(featuredataset)
DATA_IS_WINDOWED = featuredataset.columns.str.match('0_').any()
WINDOW_INDEXES = sorted(list(set([int(e.split('_')[0]) for e in featuredataset.columns[featuredataset.columns.str.match('\d+_')].to_list()])))
print('Data is WINDOWED!' if DATA_IS_WINDOWED else '', '%d windows' % (len(WINDOW_INDEXES)))

Loading dataset from file: paper-onlycorrectdetections_extraction-outputPROCESSED_FEATURES_20230119-163834_windowed2112-mainset-phase3PROCESSED_FEATURES.pickle
Reading dataset from pickle...
Successfully Loaded!
It took 8.8s to load from regular pickle
Dataset loaded!
Data is WINDOWED! 17 windows


# to sqlite

In [11]:
# import sqlite3
# # Create your connection.
# cnx = sqlite3.connect(':memory:')
# featuredataset.to_sql(name='price2', con=cnx)

# To csv

In [12]:
featuredataset.to_csv('CSV_'+os.path.splitext(os.path.basename(DATASET_FILENAME))[0] + '.csv', index=False)