# Customer transactions

In [1]:
import pandas as pd
import numpy as np

## Import classes to SQL 

In [2]:
from sqlalchemy import create_engine,VARCHAR, DATE

## Import classes for date

In [3]:
from datetime import date

## Read the JSON file

In [4]:
df_customer_transaction = pd.read_json("customer_transaction_info.json")

In [5]:
length = len(df_customer_transaction)
df_customer_transaction.head(length)

Unnamed: 0,txn_id,avail_date,last_name,first_name,birthday
0,TXN-24546,2030-09-08,ORTIZ,EDUARDO,1990-07-08
1,TXN-14642,2026-05-26,NIENOW,LEA,2000-11-26
2,TXN-60295,2006-09-25,LESCH,FLETA,1993-05-22
3,TXN-60295,2006-09-25,LESCH,FLETA,1993-05-22
4,TXN-60295,2006-09-25,LESCH,FLETA,1993-05-22
...,...,...,...,...,...
130648,TXN-65468,2012-06-16,Cummings,Henry,2005-08-14
130649,TXN-60822,2020-09-28,Feil,Jermey,2010-06-15
130650,TXN-60822,2020-09-28,Feil,Jermey,2010-06-15
130651,TXN-01784,2015-01-11,Schmidt,Emilie,1996-05-21


# Cleaning the Database

## Drop Inconsistent avail_date and birthday

In [6]:
df_customer_transaction['avail_date'] = pd.to_datetime(df_customer_transaction['avail_date'], errors='coerce')
df_customer_transaction['birthday'] = pd.to_datetime(df_customer_transaction['birthday'], errors='coerce')
df_filt = df_customer_transaction[(df_customer_transaction['avail_date'] <= pd.to_datetime(date.today()))]
df_filt = df_filt[(df_filt['birthday'] <= pd.to_datetime(date.today()))]
df_bday_avail = df_filt[(df_filt['birthday'] < df_filt['avail_date'])]

## Drop Duplicates

In [7]:
df_DropDuplicated_Customers = df_bday_avail.drop_duplicates()

## Drop Non Alpha First and Last Names

In [8]:
nonAlphaNames = df_DropDuplicated_Customers[df_DropDuplicated_Customers['first_name'].str.isalpha()]
nonAlphaNames = nonAlphaNames[nonAlphaNames['last_name'].str.isalpha()]

## Export the Clean Data to SQL 

In [9]:
nonAlphaNames['avail_date'] = pd.to_datetime(df_customer_transaction['avail_date'], errors='coerce')
nonAlphaNames['birthday'] = pd.to_datetime(df_customer_transaction['birthday'], errors='coerce')
engine = create_engine('sqlite:///customer_transactions.db', echo=True)
nonAlphaNames.to_sql('customer_transactions', con=engine, if_exists='replace', index=False,
                              dtype={
                                  "txn_id": VARCHAR(10),
                                  "avail_date" : DATE,
                                  "last_name" : VARCHAR(20),
                                  "first_name" : VARCHAR(20),
                                  "birthday" : DATE,
                              })

2023-11-23 00:29:24,752 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-11-23 00:29:24,758 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("customer_transactions")
2023-11-23 00:29:24,758 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-23 00:29:24,758 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("customer_transactions")
2023-11-23 00:29:24,758 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-11-23 00:29:24,761 INFO sqlalchemy.engine.Engine 
CREATE TABLE customer_transactions (
	txn_id VARCHAR(10), 
	avail_date DATE, 
	last_name VARCHAR(20), 
	first_name VARCHAR(20), 
	birthday DATE
)


2023-11-23 00:29:24,761 INFO sqlalchemy.engine.Engine [no key 0.00054s] ()
2023-11-23 00:29:24,986 INFO sqlalchemy.engine.Engine INSERT INTO customer_transactions (txn_id, avail_date, last_name, first_name, birthday) VALUES (?, ?, ?, ?, ?)
2023-11-23 00:29:24,988 INFO sqlalchemy.engine.Engine [generated in 0.17221s] [('TXN-60295', '2006-09-25', 'LESCH', 'FLETA', '1993-05-22'), ('TXN-40

34888