# SQL Analysis - Pink Slip Data
Querying alteration slip data exported from the web app using raw SQL.

In [None]:
import sqlite3
import pandas as pd

df = pd.read_csv('pink_slips.csv')

conn = sqlite3.connect(':memory:')

slips = df[['slip_number', 'first_initial', 'last_name', 'phone',
             'date_received', 'due_date', 'due_time', 'total_amount']].drop_duplicates(subset='slip_number')
slips.to_sql('slips', conn, index=False)

items = df[['slip_number', 'item_type', 'work_description', 'price']]
items.to_sql('items', conn, index=False)

print('slips:')
display(pd.read_sql_query('SELECT * FROM slips LIMIT 5', conn))

print('items:')
display(pd.read_sql_query('SELECT * FROM items LIMIT 5', conn))

slips:


Unnamed: 0,slip_number,first_initial,last_name,phone,date_received,due_date,due_time,total_amount
0,100001,D,Barrett,(404) 555-9933,01/01/2024,01/15/2024,,24.0
1,100002,M,Turner,(980) 555-8679,01/01/2024,01/15/2024,,13.0
2,100003,J,Bryant,(980) 555-3255,01/01/2024,01/15/2024,,25.0
3,100004,K,Kim,(843) 555-8654,01/01/2024,01/15/2024,,75.0
4,100005,Y,Jacobs,(828) 555-2115,01/01/2024,01/15/2024,,26.0


items:


Unnamed: 0,slip_number,item_type,work_description,price
0,100001,Jacket,Shorten sleeves,11.0
1,100001,Jacket,Shorten sleeves,13.0
2,100002,Shirt,Hem,13.0
3,100003,Pants,Take in waist,25.0
4,100004,Shirt,Resize,55.0


## Revenue by Month

In [2]:
pd.read_sql_query('''
    SELECT
        substr(date_received, 7, 4) || '-' || substr(date_received, 1, 2) AS month,
        COUNT(*) AS total_slips,
        ROUND(SUM(total_amount), 2) AS revenue,
        ROUND(AVG(total_amount), 2) AS avg_slip_value
    FROM slips
    GROUP BY month
    ORDER BY month
''', conn)

Unnamed: 0,month,total_slips,revenue,avg_slip_value
0,2024-01,461,34273.0,74.34
1,2024-02,456,32109.0,70.41
2,2024-03,471,31820.0,67.56
3,2024-04,508,34219.0,67.36
4,2024-05,463,31435.0,67.89
5,2024-06,460,32214.0,70.03
6,2024-07,473,30000.0,63.42
7,2024-08,489,32364.0,66.18
8,2024-09,455,30694.0,67.46
9,2024-10,491,33158.0,67.53


## Top 10 Customers by Total Spend

In [3]:
pd.read_sql_query('''
    SELECT
        first_initial || '. ' || last_name AS customer,
        phone,
        COUNT(*) AS visits,
        ROUND(SUM(total_amount), 2) AS total_spent,
        ROUND(AVG(total_amount), 2) AS avg_per_visit
    FROM slips
    GROUP BY first_initial, last_name, phone
    ORDER BY total_spent DESC
    LIMIT 10
''', conn)

Unnamed: 0,customer,phone,visits,total_spent,avg_per_visit
0,K. Garza,(704) 555-6753,36,3236.0,89.89
1,L. Jacobs,(704) 555-9822,36,3016.0,83.78
2,Q. Franklin,(704) 555-6581,36,2959.0,82.19
3,C. Walsh,(980) 555-6399,36,2827.0,78.53
4,X. Edwards,(980) 555-2018,36,2810.0,78.06
5,P. Hernandez,(704) 555-7525,36,2795.0,77.64
6,R. Fox,(704) 555-4737,36,2748.0,76.33
7,G. Cook,(678) 555-8938,36,2731.0,75.86
8,U. Gardner,(704) 555-6943,36,2728.0,75.78
9,M. James,(704) 555-7571,36,2711.0,75.31
