# Libraries for this research notebook

In [1]:
import pandas as pd
from dotenv import load_dotenv
import os

# to overcome path issue for src
%reload_ext autoreload
%autoreload 2

from pathlib import Path
import sys

# set the path to the current file
current_file_path = Path().resolve()

# set the path to the src folder
src_folder_path = current_file_path.parent

# add the src folder to the system path
sys.path.append(str(src_folder_path))

import src.data_loader as DB

# Query data from MySQL

In [2]:
from abc import ABC, abstractmethod
from dotenv import dotenv_values
import pandas as pd
from sqlalchemy import create_engine


class MyDataLoader(ABC):
    '''
      Provide DataLoader interface for documentation purposes.
      Used for CSVDataLoader and DBDataLoader.
    '''
    @abstractmethod
    def load(self):
        pass

class DBDataLoader(MyDataLoader):
    '''
    '''

    config = dotenv_values()  

    def __init__(self):
        self.database = self._get_database_engine()
        self.query = 'SELECT * FROM oil'

    def load(self, query=None):
        # If custom SQL query not provided, use default query
        if query is None:
            query = self.query
        print('Loading dataset from database...')
        return pd.read_sql(query, self.database)

    def _get_database_engine(self):
        host = self.config.get('DB_HOST')
        user = self.config.get('DB_USER')
        password = self.config.get('DB_PWD')
        db = self.config.get('DB_NAME')

        return create_engine(f'mysql+pymysql://{user}:{password}@{host}/{db}')

In [3]:
# load in data set using string query
query = '''
    SELECT *
    FROM time_series.oil
'''

# instantiate the DataLoader object
results = DBDataLoader().load(query=query)

# create a connection
# conn = load_data.initiate_local_connection()

results.head()

Loading dataset from database...


Unnamed: 0,date,dcoilwtico
0,2013-01-01,
1,2013-01-02,93.14
2,2013-01-03,92.97
3,2013-01-04,93.12
4,2013-01-07,93.2


In [4]:
results.shape

(1218, 2)

In [6]:
# load in data set using .sql file
query_file_path = '../src/scripts/query_joined_table.sql'

with open(query_file_path) as f_sql:
    sql_raw = f_sql.read()
    results2 = DBDataLoader().load(query=sql_raw)
# results = load_data.query_from_file(conn, query_file_path)

results2.head()

Loading dataset from database...


Unnamed: 0,id,date,store_nbr,family,sales,onpromotion,city,state,type,cluster,transactions,locale
0,108471,2013-03-02,52,AUTOMOTIVE,0.0,0,Manta,Manabi,A,11,,Local
1,108472,2013-03-02,52,BABY CARE,0.0,0,Manta,Manabi,A,11,,Local
2,108473,2013-03-02,52,BEAUTY,0.0,0,Manta,Manabi,A,11,,Local
3,108474,2013-03-02,52,BEVERAGES,0.0,0,Manta,Manabi,A,11,,Local
4,108475,2013-03-02,52,BOOKS,0.0,0,Manta,Manabi,A,11,,Local


In [7]:
results2.shape

(66, 12)

Notes: need to establish
- Database tables : column names, data types, required status
- DOTENV data
- 