In [14]:
import numpy as np

from os.path import expanduser

import ConfigParser

import psycopg2
import pandas as pd

#### Helper functions to fetch username and psw from local file so that I can post these queries!

In [15]:
HOME_FOLDER = expanduser("~")
DEFAULT_CONFIG_PATH = '{}/Documents/config.ini'.format(HOME_FOLDER)


def get_config(path=DEFAULT_CONFIG_PATH, db='redshift'):
    """ Read the current user config for Redshift configuration parameters

    Args:
        path (str): Path where the user configuration file lives
        db (str): DB type (used to search for the correct configuration block)

    Returns:
        list((key, value)): Parameter configuration values

    """

    config = ConfigParser.ConfigParser()

    with open(path) as config_file:
        config.readfp(config_file)

    return config.items(db)


def connect_to_redshift():
    """ Open a live connection to Redshift

    Returns:
        Cursor: Cursor to the Database
    """

    config = dict(get_config())

    redshift = psycopg2.connect(database=config["dbname"],
                                user=config["username"],
                                password=config["password"],
                                host=config["hostname"],
                                port=int(config["port"]))
    cursor = redshift.cursor()

    return cursor


def execute_and_fetch_results(cursor, query, as_pandas=True):
    """ Runs a query against the database and returns the result.

    The records are returned either  as a list of rows + list of column names
    OR as a Pandas Dataframe

    Args:
        cursor (Cursor): Cursor to the Database
        query (str): SQL statement to run against the database
        as_pandas(bool): True if the results should be returned as a Pandas DataFrame

    Returns:
        list(tuples)| DataFrame: Results of the query
    """

    cursor.execute(query)

    results = cursor.fetchall()
    column_names = [desc[0] for desc in cursor.description]

    if as_pandas:
        df = pd.DataFrame([x for x in results], columns=column_names)
        return df

    return results, column_names

In [16]:
cursor = connect_to_redshift()

#### Some template stuff

In [1]:
rng_beg = pd.date_range(start='1/1/2015',end='6/1/2018', freq='W')
rng_end = pd.date_range(start='1/7/2015',end='6/7/2018', freq='W')

rng_beg

NameError: name 'pd' is not defined

In [None]:
#query = """
#SELECT sum(order_amount_gbp) AS order_amount,sum(publisher_amount_gbp) AS publisher_amount
#FROM mugic_comms_gbp
#WHERE transaction_date > '{}' AND transaction_date < '{}'
#LIMIT 10
#""".format(a,b)


In [None]:
#print query

In [None]:
#data = execute_and_fetch_results(cursor,query)
#data['week_end'] = b

In [None]:
#df = pd.DataFrame()

In [None]:
#df = df.append(data)
#df

#### Actual Loop

In [None]:
#cursor = connect_to_redshift()

rng_beg = pd.date_range(start='1/1/2015',end='6/1/2018', freq='W')
rng_end = pd.date_range(start='1/7/2015',end='6/7/2018', freq='W')

df = pd.DataFrame()

for i in range(177):
    beg = rng_beg[i].strftime('%Y/%m/%d')
    end = rng_end[i].strftime('%Y/%m/%d')
    
    query = """
    SELECT sum(order_amount_gbp) AS order_amount,sum(publisher_amount_gbp) AS publisher_amount
    FROM mugic_comms_gbp
    WHERE transaction_date > '{}' AND transaction_date < '{}' AND skimlinks_status IN (1,2)
    LIMIT 10
    """.format(beg,end)
    
    data = execute_and_fetch_results(cursor,query)
    data['week_end'] = end
    data['currency'] = 'GBP'
    
    df = df.append(data)
    
df.reset_index(inplace=True)
df.head()
    
    
    

In [None]:
#df.drop(['index'],axis=1,inplace=True)

df.to_csv('weekly_commissions_since2015.csv',index=False)