In [1]:
import pandas as pd
import numpy as np
import datetime
from sqlalchemy import create_engine,VARCHAR, DATE

## IMPORT DATA SETS

In [2]:
df_branch_service = pd.read_parquet('branch_service_formatted_values.parquet')
df_customer_transaction = pd.read_parquet('finalcustomerinfo.parquet')

## MERGE

In [10]:
df_merged = pd.merge(left = df_customer_transaction, right = df_branch_service, left_on = 'txn_id', right_on = 'txn_id')

# DATA PROFILE

## Yearly Sales per Branch View

In [4]:
df_merged['avail_date'] = pd.to_datetime(df_merged['avail_date'])
df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name'])['price'].sum().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
avail_date,branch_name,Unnamed: 2_level_1
2005,FrankMall,24364.27
2005,MallOfAsia,21842.46
2005,MayMall,27806.39
2005,MegaMall,25472.43
2005,RobinsonsMall,19155.32
...,...,...
2023,MayMall,31097.52
2023,MegaMall,23454.71
2023,RobinsonsMall,24511.77
2023,SmallMall,31839.79


## Weekly Sales per Service View

In [5]:
df_merged['week'] = df_merged['avail_date'].apply(
    lambda date: datetime.date.isocalendar(date)[1])
#print(df_merged)
df_merged.groupby([df_merged['week'], 'service'])['price'].sum().to_frame()

Unnamed: 0_level_0,Unnamed: 1_level_0,price
week,service,Unnamed: 2_level_1
1,FootSpa,8610.32
1,HairColor,8368.55
1,Haircut,7603.80
1,Manicure,5578.23
1,NailColor,2951.76
...,...,...
53,Haircut,661.20
53,Manicure,1104.60
53,NailColor,632.52
53,Pedicure,1169.85


## Yearly Sales per Service per Branch View

In [6]:
df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name', 'service'])['price'].sum().to_frame() 

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,price
avail_date,branch_name,service,Unnamed: 3_level_1
2005,FrankMall,FootSpa,2903.48
2005,FrankMall,HairColor,2642.70
2005,FrankMall,Haircut,1983.60
2005,FrankMall,Manicure,1656.90
2005,FrankMall,NailColor,753.00
...,...,...,...
2023,StarMall,Haircut,2446.44
2023,StarMall,Manicure,2374.89
2023,StarMall,NailColor,1054.20
2023,StarMall,Pedicure,2183.72


## Highest Earning Branch Annually

In [7]:
df_grouped = df_merged.groupby([df_merged.avail_date.dt.year, 'branch_name'])['price'].sum().reset_index()
max_prices = df_grouped.groupby('avail_date')['price'].idxmax()
df_highest_prices = df_grouped.loc[max_prices]

df_highest_prices

Unnamed: 0,avail_date,branch_name,price
2,2005,MayMall,27806.39
9,2006,MayMall,27620.82
16,2007,MayMall,29436.42
22,2008,MallOfAsia,31048.12
29,2009,MallOfAsia,31361.25
37,2010,MayMall,31555.04
48,2011,StarMall,34091.67
51,2012,MayMall,35344.79
57,2013,MallOfAsia,33330.16
64,2014,MallOfAsia,33763.04


In [8]:
df_merged.sort_values(by=['avail_date'], inplace=True, ascending=True)   
df_merged.head(30)

Unnamed: 0,txn_id,avail_date,last_name,first_name,birthday,branch_name,service,price,week
27950,TXN-01774,2005-01-01,Conroy,Vicente,1993-10-02,MegaMall,Manicure,55.23,53
19968,TXN-59909,2005-01-01,Rau,Justus,1998-03-18,RobinsonsMall,HairColor,88.09,53
7979,TXN-24982,2005-01-01,Goldner,Arlene,2003-05-01,MallOfAsia,NailColor,30.12,53
7601,TXN-13098,2005-01-01,Mraz,Doris,1994-08-06,MegaMall,Pedicure,77.99,53
26690,TXN-14571,2005-01-02,Abbott,Josue,2000-10-03,RobinsonsMall,NailColor,30.12,53
12320,TXN-41893,2005-01-02,Beier,Rickey,1997-07-10,MegaMall,FootSpa,0.0,53
8548,TXN-35560,2005-01-02,Stiedemann,Mary,2001-03-16,MallOfAsia,Rebond,400.23,53
10471,TXN-08018,2005-01-02,Feeney,Corbin,2004-10-05,StarMall,Pedicure,0.0,53
15406,TXN-57588,2005-01-03,Wisozk,Eduardo,1996-08-14,RobinsonsMall,HairColor,0.0,1
13054,TXN-27027,2005-01-03,Langosh,Cecelia,1993-09-16,RobinsonsMall,Pedicure,0.0,1


## Export to SQLite DB

In [12]:
engine = create_engine('sqlite:///german_tips.db', echo=True)
df_merged.to_sql('german_tips', con=engine, if_exists='replace', index=False,
                               dtype={
                                   "txn_id": VARCHAR(10),
                                   "avail_date": DATE,
                                   "last_name": VARCHAR(50),
                                   "first_name": VARCHAR(50),
                                   "birthday": DATE,
                                   "branch_name" : VARCHAR(20),
                                   "service" : VARCHAR(20),
                                   "price" : VARCHAR(20),
                               })

2023-11-24 15:08:11,006 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-24 15:08:11,008 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("german_tips")
2023-11-24 15:08:11,009 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:08:11,014 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("german_tips")
2023-11-24 15:08:11,017 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:08:11,019 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='table' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2023-11-24 15:08:11,020 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:08:11,022 INFO sqlalchemy.engine.Engine SELECT name FROM sqlite_master WHERE type='view' AND name NOT LIKE 'sqlite~_%' ESCAPE '~' ORDER BY name
2023-11-24 15:08:11,023 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-24 15:08:11,025 INFO sqlalchemy.engine.Engine PRAGMA main.table_xinfo("german_tips")
2023-11-24 15:08:11,026 INFO sqlalchemy.engine.Engine [raw sql] ()


40040