In [1]:
import pandas as pd
import numpy as np
import datetime
from sqlalchemy import create_engine,VARCHAR, DATE

## IMPORT DATA SETS

In [2]:
df_branch_service = pd.read_parquet('parquets/branch_service_formatted_values.parquet')
df_customer_transaction = pd.read_parquet('parquets/finalcustomerinfo.parquet')

## MERGE

In [3]:
df_merged = pd.merge(left = df_customer_transaction, right = df_branch_service, left_on = 'txn_id', right_on = 'txn_id')

# DATA PROFILE

## Yearly Sales per Branch View

In [4]:
df_merged['avail_date'] = pd.to_datetime(df_merged['avail_date'])
df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name'])['price'].sum().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
avail_date,branch_name,Unnamed: 2_level_1
2005,FrankMall,19671.16
2005,MallOfAsia,19480.69
2005,MayMall,22354.24
2005,MegaMall,21749.51
2005,RobinsonsMall,15190.46
...,...,...
2023,MayMall,27102.23
2023,MegaMall,20619.59
2023,RobinsonsMall,20214.06
2023,SmallMall,26595.70


## Weekly Sales per Service View

In [5]:
df_merged['week'] = df_merged['avail_date'].apply(
    lambda date: datetime.date.isocalendar(date)[1])
df_merged.groupby([df_merged['week'], 'service'])['price'].sum().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
week,service,Unnamed: 2_level_1
1,FootSpa,7208.64
1,HairColor,6694.84
1,Haircut,6149.16
1,Manicure,4584.09
1,NailColor,2590.32
...,...,...
53,Haircut,528.96
53,Manicure,994.14
53,NailColor,602.40
53,Pedicure,935.88


## Yearly Sales per Service per Branch View

In [6]:
df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name', 'service'])['price'].sum().to_frame() 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,price
avail_date,branch_name,service,Unnamed: 3_level_1
2005,FrankMall,FootSpa,2202.64
2005,FrankMall,HairColor,2290.34
2005,FrankMall,Haircut,1586.88
2005,FrankMall,Manicure,1325.52
2005,FrankMall,NailColor,632.52
...,...,...,...
2023,StarMall,Haircut,2248.08
2023,StarMall,Manicure,1933.05
2023,StarMall,NailColor,903.60
2023,StarMall,Pedicure,1793.77


## Highest Earning Branch Annually

In [7]:
df_grouped = df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name'])['price'].sum().reset_index()
max_prices = df_grouped.groupby('avail_date')['price'].idxmax()
df_highest_prices = df_grouped.loc[max_prices]

df_highest_prices

Unnamed: 0,avail_date,branch_name,price
2,2005,MayMall,22354.24
7,2006,FrankMall,23044.94
16,2007,MayMall,25013.53
27,2008,StarMall,26296.69
31,2009,MegaMall,27389.42
37,2010,MayMall,28180.78
47,2011,SmallMall,30795.09
53,2012,RobinsonsMall,29413.65
57,2013,MallOfAsia,29830.39
64,2014,MallOfAsia,27693.4


In [8]:
df_merged.sort_values(by=['avail_date'], inplace=True, ascending=True)   
df_merged.head(30)

Unnamed: 0,txn_id,avail_date,last_name,first_name,birthday,branch_name,service,price,week
24048,TXN-01774,2005-01-01,Conroy,Vicente,1993-10-02,MegaMall,Manicure,55.23,53
16066,TXN-59909,2005-01-01,Rau,Justus,1998-03-18,RobinsonsMall,HairColor,88.09,53
4699,TXN-24982,2005-01-01,Goldner,Arlene,2003-05-01,MallOfAsia,NailColor,30.12,53
6569,TXN-08018,2005-01-02,Feeney,Corbin,2004-10-05,StarMall,Pedicure,0.0,53
8418,TXN-41893,2005-01-02,Beier,Rickey,1997-07-10,MegaMall,FootSpa,0.0,53
22788,TXN-14571,2005-01-02,Abbott,Josue,2000-10-03,RobinsonsMall,NailColor,30.12,53
5031,TXN-35560,2005-01-02,Stiedemann,Mary,2001-03-16,MallOfAsia,Rebond,400.23,53
9152,TXN-27027,2005-01-03,Langosh,Cecelia,1993-09-16,RobinsonsMall,Pedicure,0.0,1
11504,TXN-57588,2005-01-03,Wisozk,Eduardo,1996-08-14,RobinsonsMall,HairColor,0.0,1
30857,TXN-50243,2005-01-04,Schaden,Jamel,1990-06-21,FrankMall,FootSpa,100.12,1


## Export to SQLite DB

In [9]:
engine = create_engine('sqlite:///german_tips.db', echo=True)
df_merged.to_sql('transactions', con=engine, if_exists='replace', index=False,
                               dtype={
                                   "txn_id": VARCHAR(10),
                                   "avail_date": DATE,
                                   "last_name": VARCHAR(50),
                                   "first_name": VARCHAR(50),
                                   "birthday": DATE,
                                   "branch_name" : VARCHAR(20),
                                   "service" : VARCHAR(20),
                                   "price" : VARCHAR(20),
                               })

2023-11-24 15:48:26,343 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-24 15:48:26,372 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("german_tips")
2023-11-24 15:48:26,374 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:48:26,383 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("german_tips")
2023-11-24 15:48:26,385 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:48:26,387 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2023-11-24 15:48:26,389 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:48:26,391 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2023-11-24 15:48:26,394 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:48:26,397 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("german_tips")
2023-11-24 15:48:26,399 INFO sqlalchemy.engine.Engine [raw sql] ()


34455