# E T **L**

### Version 1.0

---

### Imports

In [1]:
import pandas as pd
import numpy as np
import os
from sqlalchemy import create_engine
try:
    import _pickle as pickle # for serialization, _pickle == cPickle (faster than pickle)
except:
    import pickle # alternative

### Dev

In [8]:
class DataHandler:
    
    def __init__(self, dbname='DataFrames'):
        try:
            self.engine = create_engine('sqlite:///..//data/cleaned/{}.db'.format(dbname))
        except:
            pass
    
    
    # Database
    def save_to_db(self, df, symbol, engine, index=False, if_exists='replace'):
        try:
            # save the dataframe as a table in the DataFrames.db
            df.to_sql(symbol, self.engine, index=index, if_exists=if_exists)
        except:
            raise Exception("No Connection to Database available.")
    
    def load_from_db(self, symbol):
        try:
            return pd.read_sql_table(symbol, con=self.engine)
        except:
            raise Exception("No Connection to Database available.")
    
    
    # Machine Learning Format
    def save_to_npz(self, X, y, symbol, path=''):
        # save the arrays
        if path == '':
            path = '../data/ml_format/{}.npz'.format(symbol)
        else:
            path = os.join(path, '{}.npz'.format(symbol))
        np.savez(path, X=X, y=y)

        
    def load_from_npz(self, symbol, path=''):
        if path == '':
            path = '../data/ml_format/{}.npz'.format(symbol)
        else:
            path = os.join(path, '{}.npz'.format(symbol))
        try:
            with np.load(path) as data:
                X = data['X']
                y = data['y']
            return X, y
        except:
            raise Exception("Can't load from path {}.".path)
       
    
    def serialize(self, path='serialized_tool_objects/datahandler.p'):
        with open(path, 'wb') as file:
            pickle.dump(preparer.scalers, file)
    
    
    def initialize(self, path='serialized_tool_objects/datahandler.p'):
        with open(path, 'rb') as file:
            self.scalers = pickle.load(file)
            
            
    def __repr__(self):
        return 'DataHandler({})'.format(self.dbname)

### Test

In [9]:
data_handler = DataHandler()

In [13]:
df = data_handler.load_from_db('AAPL')
df.head()

Unnamed: 0,Open,High,Low,Close,Volume,Ex-Dividend,Split_Ratio,Adj._Open,Adj._High,Adj._Low,Adj._Close,Adj._Volume,Year,Month,Day,Adj._Close_next
0,28.75,28.87,28.75,28.75,2093900.0,0.0,1.0,0.422706,0.42447,0.422706,0.422706,117258400.0,1980,12,12,0.400652
1,27.38,27.38,27.25,27.25,785200.0,0.0,1.0,0.402563,0.402563,0.400652,0.400652,43971200.0,1980,12,15,0.371246
2,25.37,25.37,25.25,25.25,472000.0,0.0,1.0,0.37301,0.37301,0.371246,0.371246,26432000.0,1980,12,16,0.380362
3,25.87,26.0,25.87,25.87,385900.0,0.0,1.0,0.380362,0.382273,0.380362,0.380362,21610400.0,1980,12,17,0.391536
4,26.63,26.75,26.63,26.63,327900.0,0.0,1.0,0.391536,0.3933,0.391536,0.391536,18362400.0,1980,12,18,0.415355


In [16]:
X, y = data_handler.load_from_npz('AAPL')
print(X, y)

[[-0.536193   -0.53993819 -0.53157229 ... -1.77172831  1.59228433
  -0.428005  ]
 [-0.54632859 -0.5508706  -0.54278073 ... -1.77172831  1.59228433
  -0.08489768]
 [-0.56119906 -0.56561836 -0.55772531 ... -1.77172831  1.59228433
   0.02947143]
 ...
 [ 0.50880872  0.51522323  0.51342778 ...  1.7591928  -1.02087964
   0.71568608]
 [ 0.49689755  0.49497258  0.48607919 ...  1.7591928  -1.02087964
   0.83005519]
 [ 0.49453011  0.51830485  0.49728763 ...  1.7591928  -1.02087964
   1.17316252]] [  0.40065169   0.37124607   0.38036181 ... 164.94       172.77
 168.34      ]
