## First and Repeat Purchases Revenue - Upload to Google Sheet

In [16]:
# Libraries needed
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine

import json
from datetime import datetime, date, timedelta

In [17]:
import creds_panoply

POSTGRES_ADDRESS = creds_panoply.address
POSTGRES_PORT = creds_panoply.port
POSTGRES_USERNAME = creds_panoply.username
POSTGRES_PASSWORD = creds_panoply.pw
POSTGRES_DBNAME = creds_panoply.dbname

postgres_str = ('postgresql://{username}:{password}@{ipaddress}:{port}/{dbname}'.format(
    username=POSTGRES_USERNAME,
    password=POSTGRES_PASSWORD,
    ipaddress=POSTGRES_ADDRESS,
    port=POSTGRES_PORT,
    dbname=POSTGRES_DBNAME))

cnx = create_engine(postgres_str)

In [18]:
# Importing selected fields
tbl_name = "cirkul_database_import.business_intelligence_customers AS CUSTOMERS INNER JOIN cirkul_database_import.business_intelligence_orders as ORDERS ON ORDERS.customer_id = CUSTOMERS.shopify_customer_id"
sel_fields = "shopify_customer_id, first_purchase_at, order_processed_at_date, order_revenue"
query = "SELECT " + sel_fields + " FROM " + tbl_name
df_orders = pd.read_sql_query(query, cnx)


In [19]:
df_orders

Unnamed: 0,shopify_customer_id,first_purchase_at,order_processed_at_date,order_revenue
0,2911096635482,2020-01-05 02:14:10,2021-02-04 07:43:40,102.35
1,3835229732954,2020-11-06 20:51:10,2021-02-04 15:03:10,40.00
2,1331687260250,2019-05-16 15:13:19,2021-02-04 13:33:46,25.00
3,3861229502554,2020-11-25 19:39:04,2021-02-04 14:18:34,48.00
4,2994864980058,2020-01-24 20:47:33,2020-02-07 10:25:34,27.00
...,...,...,...,...
3068555,5121559691354,2021-04-06 14:02:31,2021-04-06 14:02:31,20.00
3068556,5121641087066,2021-04-06 14:32:26,2021-04-06 14:32:26,42.00
3068557,5118006886490,2021-04-06 14:19:42,2021-04-06 14:19:42,11.00
3068558,5121520107610,2021-04-06 14:00:45,2021-04-06 14:00:45,42.00


In [21]:
# Converting the dates into propoer date
df_orders['first_purchase_at'] = pd.to_datetime(df_orders['first_purchase_at'], origin='unix', unit='s').dt.strftime('%Y-%m-%d')
df_orders['order_processed_at_date'] = pd.to_datetime(df_orders['order_processed_at_date'], origin='unix', unit='s').dt.strftime('%Y-%m-%d')

# Adding a column for month
df_orders['Month'] = pd.to_datetime(df_orders['order_processed_at_date']).dt.strftime('%Y-%m')

df_orders.head()

Unnamed: 0,shopify_customer_id,first_purchase_at,order_processed_at_date,order_revenue,Month
0,2911096635482,2020-01-05,2021-02-04,102.35,2021-02
1,3835229732954,2020-11-06,2021-02-04,40.0,2021-02
2,1331687260250,2019-05-16,2021-02-04,25.0,2021-02
3,3861229502554,2020-11-25,2021-02-04,48.0,2021-02
4,2994864980058,2020-01-24,2020-02-07,27.0,2020-02


In [22]:
# Creating another field for identifying the first and repeat purchase orders

def label_OrderType (row):
    if row['first_purchase_at'] == row['order_processed_at_date']:
        return 'FIRST'
    else:
        return 'REPEAT'

df_orders['ORDER_TYPE'] = df_orders.apply(lambda row: label_OrderType(row), axis=1)
df_orders

Unnamed: 0,shopify_customer_id,first_purchase_at,order_processed_at_date,order_revenue,Month,ORDER_TYPE
0,2911096635482,2020-01-05,2021-02-04,102.35,2021-02,REPEAT
1,3835229732954,2020-11-06,2021-02-04,40.00,2021-02,REPEAT
2,1331687260250,2019-05-16,2021-02-04,25.00,2021-02,REPEAT
3,3861229502554,2020-11-25,2021-02-04,48.00,2021-02,REPEAT
4,2994864980058,2020-01-24,2020-02-07,27.00,2020-02,REPEAT
...,...,...,...,...,...,...
3068555,5121559691354,2021-04-06,2021-04-06,20.00,2021-04,FIRST
3068556,5121641087066,2021-04-06,2021-04-06,42.00,2021-04,FIRST
3068557,5118006886490,2021-04-06,2021-04-06,11.00,2021-04,FIRST
3068558,5121520107610,2021-04-06,2021-04-06,42.00,2021-04,FIRST


In [25]:
# Grouping
df_output = df_orders.groupby(['Month', 'ORDER_TYPE']).sum()['order_revenue'].to_frame()
df_output.reset_index(inplace=True)
df_output.head()

Unnamed: 0,Month,ORDER_TYPE,order_revenue
0,2017-10,FIRST,674.0
1,2017-11,FIRST,4366.0
2,2017-11,REPEAT,540.0
3,2017-12,FIRST,6486.92
4,2017-12,REPEAT,8324.87


### Sending it to Google sheets

In [27]:
import pygsheets
import pandas as pd

gc = pygsheets.authorize(service_file='cred_gsheets_analytics_snowflake.json')
file_dataForChartio = gc.open("Data for Chartio Plots")
sht_First_n_Repeat_Orders = file_dataForChartio[1]

sht_First_n_Repeat_Orders.set_dataframe(df_output,(1,1))

SpreadsheetNotFound: Could not find a spreadsheet with title Data for Chartio Plots.

In [29]:
df_output.to_csv(path_or_buf='data.csv')