# E T **L**

### Version 1.0

---

### Imports

In [1]:
import pandas as pd
import numpy as np
import os
from sqlalchemy import create_engine
try:
    import _pickle as pickle # for serialization, _pickle == cPickle (faster than pickle)
except:
    import pickle # alternative

### Dev

In [2]:
class DataHandler:
    
    def __init__(self, dbname='DataFrames'):
        try:
            self.dbname = dbname
            self.engine = create_engine('sqlite:///..//data/cleaned/{}.db'.format(self.dbname))
            self.symbols = []
        except:
            pass
    
    
    # Database
    def save_to_db(self, df, symbol, index=False, if_exists='replace'):
        if symbol not in self.symbols:
            self.symbols.append(symbol)
            
        try:
            # save the dataframe as a table in the DataFrames.db
            df.to_sql(symbol, self.engine, index=index, if_exists=if_exists)
        except:
            raise Exception("No Connection to Database available.")
    
    def load_from_db(self, symbol):
        try:
            return pd.read_sql_table(symbol, con=self.engine)
        except:
            raise Exception("No Connection to Database available.")
    
    
    # Machine Learning Format
    def save_to_npz(self, X, y, symbol, save_dir=''):
        if symbol not in self.symbols:
            self.symbols.append(symbol)
            
        # save the arrays
        if save_dir == '':
            path = '../data/ml_format/{}.npz'.format(symbol)
        else:
            path = save_dir + '/{}.npz'.format(symbol)
        np.savez(path, X=X, y=y)

        
    def load_from_npz(self, symbol, path=''):
        if path == '':
            path = '../data/ml_format/{}.npz'.format(symbol)
        else:
            path = path + '/{}.npz'.format(symbol)
        try:
            with np.load(path) as data:
                X = data['X']
                y = data['y']
            return X, y
        except:
            raise Exception("Can't load from path {}.".path)
       
    
    def serialize(self, path='serialized_tool_objects/datahandler.p'):
        with open(path, 'wb') as file:
            pickle.dump([self.dbname, self.symbols], file)
    
    
    def initialize(self, path='serialized_tool_objects/datahandler.p'):
        with open(path, 'rb') as file:
            self.dbname, self.symbols = pickle.load(file)
            self.engine = create_engine('sqlite:///..//data/cleaned/{}.db'.format(self.dbname))
            
   
    def get_symbols(self, data_dir='../data/raw'):
        if self.symbols is None:
            symbols = []
            for file in os.listdir(data_dir):
                if file.endswith(".csv"):
                    print(file)
        else:
            return self.symbols

            
    def __repr__(self):
        return "DataHandler('{}')".format(self.dbname)

### Test

In [3]:
data_handler = DataHandler()

In [6]:
df = data_handler.load_from_db('A')
df.head()

Exception: No Connection to Database available.

In [16]:
X, y = data_handler.load_from_npz('AAPL')
print(X, y)

[[-0.536193   -0.53993819 -0.53157229 ... -1.77172831  1.59228433
  -0.428005  ]
 [-0.54632859 -0.5508706  -0.54278073 ... -1.77172831  1.59228433
  -0.08489768]
 [-0.56119906 -0.56561836 -0.55772531 ... -1.77172831  1.59228433
   0.02947143]
 ...
 [ 0.50880872  0.51522323  0.51342778 ...  1.7591928  -1.02087964
   0.71568608]
 [ 0.49689755  0.49497258  0.48607919 ...  1.7591928  -1.02087964
   0.83005519]
 [ 0.49453011  0.51830485  0.49728763 ...  1.7591928  -1.02087964
   1.17316252]] [  0.40065169   0.37124607   0.38036181 ... 164.94       172.77
 168.34      ]
