# Use Pandas to Play with Pens and Paper Data

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("pens.csv")

In [3]:
df

Unnamed: 0,Category,Brand,Model,Color,Note,Nib Size,Purchase Date,Price,Retailer
0,Case,Kokuyo,NeoCritz,Navy,,,2020-12-09,$15.50,JetPens
1,Case,Sun-Star,Delde,Violet,,,2020-12-09,$16.50,JetPens
2,Case,Sun-Star,Delde,Camel,Corduroy,,2020-12-19,$18.50,JetPens
3,Case,Sun-Star,Delde,Blue,Corduroy,,2020-12-19,$18.50,JetPens
4,Case,Kokuyo,NeoCritz,Navy,Navy dot,,2021-01-05,$15.50,JetPens
...,...,...,...,...,...,...,...,...,...
75,Notebook,Traveler's,Notebook Passport refill,Beige,Dot grid,,2021-05-21,$4.20,Goulet Pens
76,Notebook,Traveler's,Passport Notebook,Blue,,,2021-05-21,$40.95,Goulet Pens
77,Refill,,Ink Syringe,,,,2020-12-12,$2.50,JetPens
78,Rollerball Pen,BIGiDESIGN,Ti Arto EDC,Titanium,,,2023-06-21,$90.00,JetPens


In [16]:
#
# Define columns
#
C_CATEGORY = "Category"
C_BRAND = "Brand"
C_MODEL = "Model"
C_COLOR = "Color"
C_NOTE = "Note"
C_NIB_SIZE = "Nib Size"
C_PURCHASE_DATE = "Purchase Date"
C_PRICE = "Price"
C_RETAILER = "Retailer"
C_YEAR = "Year"

In [5]:
#
# Sanitize price
#
df[C_PRICE] = df[C_PRICE].str.replace("[$,]", "", regex=True).astype("float")
df

Unnamed: 0,Category,Brand,Model,Color,Note,Nib Size,Purchase Date,Price,Retailer
0,Case,Kokuyo,NeoCritz,Navy,,,2020-12-09,15.50,JetPens
1,Case,Sun-Star,Delde,Violet,,,2020-12-09,16.50,JetPens
2,Case,Sun-Star,Delde,Camel,Corduroy,,2020-12-19,18.50,JetPens
3,Case,Sun-Star,Delde,Blue,Corduroy,,2020-12-19,18.50,JetPens
4,Case,Kokuyo,NeoCritz,Navy,Navy dot,,2021-01-05,15.50,JetPens
...,...,...,...,...,...,...,...,...,...
75,Notebook,Traveler's,Notebook Passport refill,Beige,Dot grid,,2021-05-21,4.20,Goulet Pens
76,Notebook,Traveler's,Passport Notebook,Blue,,,2021-05-21,40.95,Goulet Pens
77,Refill,,Ink Syringe,,,,2020-12-12,2.50,JetPens
78,Rollerball Pen,BIGiDESIGN,Ti Arto EDC,Titanium,,,2023-06-21,90.00,JetPens


In [6]:
#
# Convert Purchase Date
#
df[C_PURCHASE_DATE] = pd.to_datetime(df[C_PURCHASE_DATE])

In [7]:
df

Unnamed: 0,Category,Brand,Model,Color,Note,Nib Size,Purchase Date,Price,Retailer
0,Case,Kokuyo,NeoCritz,Navy,,,2020-12-09,15.50,JetPens
1,Case,Sun-Star,Delde,Violet,,,2020-12-09,16.50,JetPens
2,Case,Sun-Star,Delde,Camel,Corduroy,,2020-12-19,18.50,JetPens
3,Case,Sun-Star,Delde,Blue,Corduroy,,2020-12-19,18.50,JetPens
4,Case,Kokuyo,NeoCritz,Navy,Navy dot,,2021-01-05,15.50,JetPens
...,...,...,...,...,...,...,...,...,...
75,Notebook,Traveler's,Notebook Passport refill,Beige,Dot grid,,2021-05-21,4.20,Goulet Pens
76,Notebook,Traveler's,Passport Notebook,Blue,,,2021-05-21,40.95,Goulet Pens
77,Refill,,Ink Syringe,,,,2020-12-12,2.50,JetPens
78,Rollerball Pen,BIGiDESIGN,Ti Arto EDC,Titanium,,,2023-06-21,90.00,JetPens


In [8]:
pens = df[df[C_CATEGORY] == "Fountain Pen"]

In [9]:
pens.sort_values(by=[C_BRAND, C_MODEL, C_COLOR, C_NIB_SIZE])

Unnamed: 0,Category,Brand,Model,Color,Note,Nib Size,Purchase Date,Price,Retailer
26,Fountain Pen,Diplomat,Aero,Black & White,,Fine,2022-01-06,180.0,Goulet Pens
29,Fountain Pen,Diplomat,Aero,Blue,with pouch and ink,Fine,2022-01-24,149.97,Pens Boutique
28,Fountain Pen,Kaweco,Sport AL Stonewashed,Blue,,Fine,2022-01-13,82.0,JetPens
30,Fountain Pen,LAMY,2000,Black,,Extra Fine,2022-06-29,199.2,Goulet Pens
12,Fountain Pen,LAMY,2000,Black,,Fine,2020-11-14,144.85,Amazon
23,Fountain Pen,LAMY,CP1,Black,,Extra Fine,2020-12-27,60.0,Goulet Pens
43,Fountain Pen,LAMY,Safari,Black,Gift from Lynn,Fine,2020-01-01,0.0,Unknown
42,Fountain Pen,LAMY,Studio,Glacier,,Fine,2023-06-21,79.2,JetPens
31,Fountain Pen,LAMY,Vista,Black,Special Edition,Extra Fine,2022-06-29,34.9,Goulet Pens
32,Fountain Pen,Montblanc,149 Meisterstuck,Black,"from the Forum, Las Vegas",Fine,2022-07-30,1000.0,Montblanc Store


In [10]:
#
# Count pens by brand
#
pens.value_counts("Brand")

Brand
Pilot         16
LAMY           6
Platinum       4
TWSBI          2
Sailor         2
Diplomat       2
Parker         1
Opus 88        1
Montblanc      1
Kaweco         1
Retro 51       1
Pelikan        1
Traveler's     1
Visconti       1
Name: count, dtype: int64

In [11]:
#
# Which retailer has the most of my business
#
pens.value_counts("Retailer")

Retailer
Goulet Pens        16
JetPens            11
PenSachi            4
Amazon              1
Kinokuniya          1
Montblanc Store     1
Pens Boutique       1
Unknown             1
Name: count, dtype: int64

In [12]:
#
# My Most Expensive Pens
#
pens.nlargest(10, columns=[C_PRICE])

Unnamed: 0,Category,Brand,Model,Color,Note,Nib Size,Purchase Date,Price,Retailer
32,Fountain Pen,Montblanc,149 Meisterstuck,Black,"from the Forum, Las Vegas",Fine,2022-07-30,1000.0,Montblanc Store
37,Fountain Pen,Visconti,Homo Sapiens,Green,Demonstrator,Extra Fine,2023-01-12,497.5,Goulet Pens
40,Fountain Pen,Pilot,Custom 845,Red,Shu Vermillion,Fine,2023-07-17,450.0,PenSachi
46,Fountain Pen,Sailor,1911L,Red,Pen of the year 2023,Fi,2023-01-01,400.0,Goulet Pens
33,Fountain Pen,Pelikan,M605,Green & white,First Pelikan,Fine,2022-11-23,345.0,Goulet Pens
47,Fountain Pen,Platinum,Kanazawa Leaf Cherry Blossom,Black,Seattle,Fine,2023-06-10,295.0,Kinokuniya
15,Fountain Pen,Pilot,Custom 823,Smoke,,Fine,2020-11-28,288.0,Goulet Pens
41,Fountain Pen,Pilot,Custom 823,Amber,,Fine,2022-12-31,288.0,Goulet Pens
30,Fountain Pen,LAMY,2000,Black,,Extra Fine,2022-06-29,199.2,Goulet Pens
26,Fountain Pen,Diplomat,Aero,Black & White,,Fine,2022-01-06,180.0,Goulet Pens


In [13]:
#
# Average price
#
print(f"My average fountain pens price is ${pens[C_PRICE].mean().round(2)}")

My average fountain pens price is $160.26


In [14]:
#
# Other price stats
#
pens[C_PRICE].describe().round()

count      38.0
mean      160.0
std       188.0
min         0.0
25%        60.0
50%        85.0
75%       175.0
max      1000.0
Name: Price, dtype: float64

In [18]:
#
# How much did I spend
#

df[C_YEAR] = df[C_PURCHASE_DATE].dt.year

for col in [C_CATEGORY, C_RETAILER, C_BRAND, C_YEAR]:
    print(f"\n# Spending by {col}")
    print(df.groupby(col)[C_PRICE].sum().sort_values(ascending=False))



# Spending by Category
Category
Fountain Pen      6089.71
Notebook           204.30
Ink                159.79
Case               104.50
Rollerball Pen      90.00
Converter           70.90
Nib                 42.00
Ink Well            14.99
Correction          11.10
Marker              11.00
Ruler                4.75
Refill               2.50
Name: Price, dtype: float64

# Spending by Retailer
Retailer
Goulet Pens        3070.37
JetPens            1113.65
Montblanc Store    1000.00
PenSachi            809.90
Kinokuniya          295.00
Amazon              226.65
Pens Boutique       149.97
Unknown               0.00
Name: Price, dtype: float64

# Spending by Brand
Brand
Pilot                 2019.04
Montblanc             1000.00
LAMY                   620.15
Sailor                 524.90
Visconti               497.50
Platinum               475.95
Pelikan                362.50
Diplomat               329.97
Traveler's             170.50
TWSBI                  139.99
BIGiDESIGN             