In [1]:
# Visualizing spending habits with data taken from id card
# Started 12/22/2019
# Jack Liu

In [2]:
# Necessary Imports
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [3]:
# Get HTML file
with open("Transaction History.html") as fp:
    soup = BeautifulSoup(fp)

In [4]:
# Appends data to appropriate lists given a row in the table
def process_row(row):
    cell = rows[i].find_all('td')
    if len(cell) == 5:
        post_time.append(datetime.strptime(cell[0].string, '%m/%d/%Y %H:%M:%S'))
        loc.append(cell[1].string)
        typ.append(cell[3].string)
        amt.append(cell[4].string)
    if len(cell) == 3:
        post_time.append(None)
        loc.append(None)
        typ.append(cell[1].string)
        amt.append(cell[2].string)

In [5]:
# Returns adjusted locations for places that are actually the same or have different names
def process_location(s):
    t = s.lower()
    if 'wash' in t:
        return 'Washer'
    elif 'dryer' in t:
        return 'Dryer'
    elif 'castle' in t:
        return 'The Castle'
    elif 'O-Hill Aero' in s:
        return 'O-Hill Dining Hall'
    elif 'Runk Dining Aero' in s:
        return 'Runk Dining Hall'
    elif 'FFC' in s:
        return 'Newcomb Dining Hall'
    elif 'crossroads' in t:
        return 'Crossroads'
    elif 'Newcomb Dining To Go' in s:
        return 'N2Go'
    elif 'Einsteins' in s:
        return 'Einsteins Bagels'
    elif 'chick' in t:
        return 'Chick-Fil-A'
    elif 'chik' in t:
        return 'Chick-Fil-A'
    elif 'subway' in t:
        return 'Subway'
    elif 'five guys' in t:
        return 'Five Guys'
    elif 'dumpling' in t:
        return 'Dumpling Food Truck'
    elif 'ValuPort' in s:
        return 'O-Hill Deposit'
    else:
        return s

In [6]:
def add_weekday(day):
    return day.weekday()

In [22]:
post_time = []
loc = []
typ = []
amt = []

rows = soup.find('div', id='divHist').find('tbody').find_all('tr')
for i in range(1,len(rows)):
    process_row(rows[i])

data = {'Time':post_time, 'Location':loc, 'Type':typ, 'Amount':amt}
df = pd.DataFrame(data).fillna(method='ffill')
df['Adjusted_Loc'] = df['Location'].apply(process_location)
df['Weekday'] = df['Time'].apply(add_weekday)
df['Meal'] = df['Time'].apply(meal_cat)
df.head(10)

Unnamed: 0,Time,Location,Type,Amount,Adjusted_Loc,Weekday,Meal
0,2019-08-21 16:40:21,Import Loaction,Meal,-50,Import Loaction,2,Dinner
1,2019-08-21 16:40:21,Import Loaction,Debit,$-150.00,Import Loaction,2,Dinner
2,2019-08-21 16:49:48,Import Loaction,Meal,-10,Import Loaction,2,Dinner
3,2019-08-23 12:22:01,OHill ValuPort III,Debit,$-20.00,O-Hill Deposit,4,Lunch
4,2019-08-23 12:22:08,OHill ValuPort III,Debit,$-20.00,O-Hill Deposit,4,Lunch
5,2019-08-23 12:22:23,OHill ValuPort III,Debit,$-20.00,O-Hill Deposit,4,Lunch
6,2019-08-23 12:25:51,O-Hill Aero 1 Loc 473,Meal,1,O-Hill Dining Hall,4,Lunch
7,2019-08-23 18:41:53,O-Hill Aero 1 Loc 473,Meal,1,O-Hill Dining Hall,4,Dinner
8,2019-08-24 08:32:16,O-Hill Aero 1 Loc 473,Meal,1,O-Hill Dining Hall,5,Breakfast
9,2019-08-24 14:07:13,ChickFilA (4414),Debit,$7.49,Chick-Fil-A,5,Lunch


In [8]:
count = 0
for i in df.iloc[:,0]:
    if i.hour>22 or i.hour<2:
        count+=1
print(count)

11


In [9]:
for i in df.iloc[:,0]:
    if i.hour>20 and i.hour<23:
        print(i)

2019-09-07 22:23:54
2019-09-08 21:09:33
2019-09-16 22:32:41
2019-09-23 21:28:17
2019-09-23 22:19:54
2019-10-01 21:37:22
2019-10-12 22:25:44
2019-10-13 21:39:02
2019-10-15 21:38:17
2019-10-20 22:29:03
2019-10-21 22:39:59
2019-10-22 22:13:46
2019-10-28 21:48:41
2019-11-11 22:03:20
2019-11-11 22:52:11
0


In [11]:
import matplotlib.pyplot as plt

In [23]:
mon = df[(df['Weekday']==0)]

In [24]:
mon

Unnamed: 0,Time,Location,Type,Amount,Adjusted_Loc,Weekday,Meal
13,2019-08-26 17:20:56,ChickFilA (4428),Meal,1,Chick-Fil-A,0,Dinner
35,2019-09-02 08:32:21,O-Hill Aero 1 Loc 473,Meal,1,O-Hill Dining Hall,0,Breakfast
36,2019-09-02 13:35:17,Newcomb Dining To Go Loc 109,Meal,1,N2Go,0,Lunch
37,2019-09-02 18:27:52,FFC Northside Loc 483,Meal,1,Newcomb Dining Hall,0,Dinner
59,2019-09-09 08:22:08,O-Hill Aero 1 Loc 473,Meal,1,O-Hill Dining Hall,0,Breakfast
60,2019-09-09 19:37:51,O-Hill CROSSROADS 1,Meal,1,Crossroads,0,Dinner
77,2019-09-16 13:55:19,Newcomb Dining To Go Loc 109,Meal,1,N2Go,0,Lunch
78,2019-09-16 18:24:59,FFC Northside Loc 483,Meal,1,Newcomb Dining Hall,0,Dinner
79,2019-09-16 22:32:41,GibbonWash2,Debit,$1.75,Washer,0,Late Night
80,2019-09-16 23:43:46,GibbonDryer12,Debit,$1.75,Dryer,0,Late Night


In [20]:
for i in mon.iloc[:,0]:
    if i.hour<12:
        print(i)

2019-09-02 08:32:21
2019-09-09 08:22:08
2019-09-23 08:34:53
2019-09-30 10:55:09
2019-10-14 09:33:43
2019-10-21 09:36:36
2019-10-28 09:31:15
2019-11-04 11:15:20
2019-11-11 11:01:05
2019-11-18 10:58:31
2019-11-25 10:43:28
2019-12-02 10:53:15
2019-12-09 10:00:19


In [21]:
# Determining what meal it is
def meal_cat(t):
    if t.hour<4:
        return 'Late Night'
    elif t.hour<12:
        return 'Breakfast'
    elif t.hour<15:
        return 'Lunch'
    elif t.hour<21:
        return 'Dinner'
    else:
        return 'Late Night'