In [1]:
import pandas as pd

The data frames `Customers`, `Employees`, `Offices`, `OrderDetails`, `Orders`, `Payments`, `ProductLines`, and `Products` contain data of the corresponding tables in the [ClassicModels database](https://www.richardtwatson.com/dm6e/Reader/ClassicModels.html).

The entity relationship diagram is shown here ![ERD](figures/ClassicModels.png)

- Report the total payments by date
- Report the total payments by year
- Report the total payments by month
- Report the total payments by year-month
- Report the total payments by year=quarter
- Report the total payments in a bi-yearly basis (each six months in each year)

*HINT* Convert `paymentDate` to a timestamp. Then you can extract year, month, and quarter using its properties. e.g.,

In [10]:
text = "6 October 2019"
ts = pd.to_datetime(text)
ts

Timestamp('2019-10-06 00:00:00')

In [3]:
Customers = pd.read_csv('data/ClassicModels_Customers.csv', sep=';')
Employees = pd.read_csv('data/ClassicModels_Employees.csv', sep=';')
Offices = pd.read_csv('data/ClassicModels_Offices.csv', sep=';')
OrderDetails = pd.read_csv('data/ClassicModels_OrderDetails.csv', sep=';')
Orders = pd.read_csv('data/ClassicModels_Orders.csv', sep=';')
Payments = pd.read_csv('data/ClassicModels_Payments.csv', sep=';')
ProductLines = pd.read_csv('data/ClassicModels_ProductLines.csv', sep=';')
Products = pd.read_csv('data/ClassicModels_Products.csv', sep=';')

In [17]:
pdate = pd.to_datetime(Payments['paymentDate'])

In [21]:
Payments.groupby('paymentDate')['amount'].sum()

paymentDate
2003-01-16 00:00:00    10223.83
2003-01-28 00:00:00    10549.01
2003-01-30 00:00:00     5494.78
2003-02-16 00:00:00    50218.95
2003-02-20 00:00:00    53959.21
                         ...   
2005-05-20 00:00:00    29070.38
2005-05-23 00:00:00    75020.13
2005-05-25 00:00:00    30253.75
2005-06-03 00:00:00    12432.32
2005-06-09 00:00:00    46656.94
Name: amount, Length: 232, dtype: float64

In [24]:
Payments.groupby(Payments['paymentDate'].str[0:4])['amount'].sum()

paymentDate
2003    3250217.70
2004    4313328.25
2005    1290293.28
Name: amount, dtype: float64

In [32]:
Payments['year']= pdate.apply(lambda x: x.year)
Payments.head()

Unnamed: 0,checkNumber,paymentDate,amount,customerNumber,year
0,AB661578,2004-07-28 00:00:00,9415.13,471,2004
1,AD304085,2003-10-24 00:00:00,36798.88,299,2003
2,AD832091,2004-09-09 00:00:00,1960.8,172,2004
3,AE192287,2005-03-10 00:00:00,23602.9,186,2005
4,AE215433,2005-03-05 00:00:00,101244.59,124,2005


In [33]:
Payments.groupby('year')['amount'].sum()

year
2003    3250217.70
2004    4313328.25
2005    1290293.28
Name: amount, dtype: float64

In [35]:
pdf = Payments.set_index(pdate)

In [39]:
Payments.groupby(pdf.index.to_period('A'))['amount'].sum()

paymentDate
2003    3250217.70
2004    4313328.25
2005    1290293.28
Freq: A-DEC, Name: amount, dtype: float64

In [42]:
Payments['month']= pdate.apply(lambda x: x.month)
Payments.head()

Unnamed: 0,checkNumber,paymentDate,amount,customerNumber,year,month
0,AB661578,2004-07-28 00:00:00,9415.13,471,2004,7
1,AD304085,2003-10-24 00:00:00,36798.88,299,2003,10
2,AD832091,2004-09-09 00:00:00,1960.8,172,2004,9
3,AE192287,2005-03-10 00:00:00,23602.9,186,2005,3
4,AE215433,2005-03-05 00:00:00,101244.59,124,2005,3


In [43]:
Payments.groupby('month')['amount'].sum()

month
1      397887.81
2      503357.59
3      989575.78
4      493457.60
5      640655.32
6      425151.10
7      442438.48
8      624299.16
9      637651.76
10     501961.39
11    1551479.98
12    1645923.26
Name: amount, dtype: float64

In [48]:
Payments['year-month']= pdate.apply(lambda x: (x.year, x.month))
Payments.head()

Unnamed: 0,checkNumber,paymentDate,amount,customerNumber,year,month,year-month
0,AB661578,2004-07-28 00:00:00,9415.13,471,2004,7,"(2004, 7)"
1,AD304085,2003-10-24 00:00:00,36798.88,299,2003,10,"(2003, 10)"
2,AD832091,2004-09-09 00:00:00,1960.8,172,2004,9,"(2004, 9)"
3,AE192287,2005-03-10 00:00:00,23602.9,186,2005,3,"(2005, 3)"
4,AE215433,2005-03-05 00:00:00,101244.59,124,2005,3,"(2005, 3)"


In [49]:
Payments.groupby('year-month')['amount'].sum()

year-month
(2003, 1)      26267.62
(2003, 2)     144384.36
(2003, 3)     199704.48
(2003, 4)     136313.92
(2003, 5)     159881.97
(2003, 6)     180218.98
(2003, 7)     158247.00
(2003, 8)     246204.86
(2003, 9)     161206.23
(2003, 10)    316857.96
(2003, 11)    694292.68
(2003, 12)    826637.64
(2004, 1)     234152.13
(2004, 2)     106652.01
(2004, 3)     404603.21
(2004, 4)     173245.96
(2004, 5)     208524.42
(2004, 6)     185842.86
(2004, 7)     284191.48
(2004, 8)     378094.30
(2004, 9)     476445.53
(2004, 10)    185103.43
(2004, 11)    857187.30
(2004, 12)    819285.62
(2005, 1)     137468.06
(2005, 2)     252321.22
(2005, 3)     385268.09
(2005, 4)     183897.72
(2005, 5)     272248.93
(2005, 6)      59089.26
Name: amount, dtype: float64

In [52]:
pdf.index.to_period('M')

PeriodIndex(['2004-07', '2003-10', '2004-09', '2005-03', '2005-03', '2003-11',
             '2003-11', '2003-12', '2003-09', '2005-02',
             ...
             '2004-11', '2004-11', '2004-05', '2004-07', '2003-04', '2004-04',
             '2003-12', '2004-01', '2004-12', '2004-02'],
            dtype='period[M]', name='paymentDate', length=273, freq='M')

In [53]:
Payments.groupby(pdf.index.to_period('M'))['amount'].sum()

paymentDate
2003-01     26267.62
2003-02    144384.36
2003-03    199704.48
2003-04    136313.92
2003-05    159881.97
2003-06    180218.98
2003-07    158247.00
2003-08    246204.86
2003-09    161206.23
2003-10    316857.96
2003-11    694292.68
2003-12    826637.64
2004-01    234152.13
2004-02    106652.01
2004-03    404603.21
2004-04    173245.96
2004-05    208524.42
2004-06    185842.86
2004-07    284191.48
2004-08    378094.30
2004-09    476445.53
2004-10    185103.43
2004-11    857187.30
2004-12    819285.62
2005-01    137468.06
2005-02    252321.22
2005-03    385268.09
2005-04    183897.72
2005-05    272248.93
2005-06     59089.26
Freq: M, Name: amount, dtype: float64

In [54]:
Payments.groupby(pdf.index.to_period('Q'))['amount'].sum()

paymentDate
2003Q1     370356.46
2003Q2     476414.87
2003Q3     565658.09
2003Q4    1837788.28
2004Q1     745407.35
2004Q2     567613.24
2004Q3    1138731.31
2004Q4    1861576.35
2005Q1     775057.37
2005Q2     515235.91
Freq: Q-DEC, Name: amount, dtype: float64

In [56]:
Payments['year-quarter']= pdate.apply(lambda x: (x.year, x.quarter))
Payments.head()

Unnamed: 0,checkNumber,paymentDate,amount,customerNumber,year,month,year-month,year-quarter
0,AB661578,2004-07-28 00:00:00,9415.13,471,2004,7,"(2004, 7)","(2004, 3)"
1,AD304085,2003-10-24 00:00:00,36798.88,299,2003,10,"(2003, 10)","(2003, 4)"
2,AD832091,2004-09-09 00:00:00,1960.8,172,2004,9,"(2004, 9)","(2004, 3)"
3,AE192287,2005-03-10 00:00:00,23602.9,186,2005,3,"(2005, 3)","(2005, 1)"
4,AE215433,2005-03-05 00:00:00,101244.59,124,2005,3,"(2005, 3)","(2005, 1)"


In [57]:
Payments.groupby('year-quarter')['amount'].sum()

year-quarter
(2003, 1)     370356.46
(2003, 2)     476414.87
(2003, 3)     565658.09
(2003, 4)    1837788.28
(2004, 1)     745407.35
(2004, 2)     567613.24
(2004, 3)    1138731.31
(2004, 4)    1861576.35
(2005, 1)     775057.37
(2005, 2)     515235.91
Name: amount, dtype: float64

In [62]:
Payments['biyear'] = Payments['year'].apply(str) + "-" + Payments['month'].apply(
        lambda x: "H1" if x in range(1, 7) else "H2")

In [63]:
Payments.head()

Unnamed: 0,checkNumber,paymentDate,amount,customerNumber,year,month,year-month,year-quarter,biyear
0,AB661578,2004-07-28 00:00:00,9415.13,471,2004,7,"(2004, 7)","(2004, 3)",2004-H2
1,AD304085,2003-10-24 00:00:00,36798.88,299,2003,10,"(2003, 10)","(2003, 4)",2003-H2
2,AD832091,2004-09-09 00:00:00,1960.8,172,2004,9,"(2004, 9)","(2004, 3)",2004-H2
3,AE192287,2005-03-10 00:00:00,23602.9,186,2005,3,"(2005, 3)","(2005, 1)",2005-H1
4,AE215433,2005-03-05 00:00:00,101244.59,124,2005,3,"(2005, 3)","(2005, 1)",2005-H1


In [65]:
Payments.groupby('biyear')['amount'].sum()

biyear
2003-H1     846771.33
2003-H2    2403446.37
2004-H1    1313020.59
2004-H2    3000307.66
2005-H1    1290293.28
Name: amount, dtype: float64

In [69]:
len(pdf)

273

In [79]:
pdf.resample('6MS')['amount'].sum()

paymentDate
2003-01-01     846771.33
2003-07-01    2403446.37
2004-01-01    1313020.59
2004-07-01    3000307.66
2005-01-01    1290293.28
Freq: 6MS, Name: amount, dtype: float64