In [None]:
# Visualizing spending habits with data taken from id card
# Started 12/22/2019
# Jack Liu

In [None]:
# Necessary Imports
from bs4 import BeautifulSoup
import pandas as pd
from datetime import datetime

In [None]:
# Appends data to appropriate lists given a row in the table
def process_row(row, data):
    cell = row.find_all('td')
    if len(cell) == 5:
        data['Time'].append(datetime.strptime(cell[0].string, '%m/%d/%Y %H:%M:%S'))
        data['Location'].append(cell[1].string)
        data['Type'].append(cell[3].string)
        data['Amount'].append(cell[4].string)
    if len(cell) == 3:
        data['Time'].append(None)
        data['Location'].append(None)
        data['Type'].append(cell[1].string)
        data['Amount'].append(cell[2].string)

In [None]:
# Returns adjusted locations for places that are actually the same or have different names
def process_location(s):
    t = s.lower()
    if 'wash' in t:
        return 'Washer'
    elif 'dryer' in t:
        return 'Dryer'
    elif 'castle' in t:
        return 'The Castle'
    elif 'O-Hill Aero' in s:
        return 'O-Hill Dining Hall'
    elif 'Runk Dining Aero' in s:
        return 'Runk Dining Hall'
    elif 'FFC' in s:
        return 'Newcomb Dining Hall'
    elif 'crossroads' in t:
        return 'Crossroads'
    elif 'Newcomb Dining To Go' in s:
        return 'N2Go'
    elif 'Einsteins' in s:
        return 'Einsteins Bagels'
    elif 'chick' in t:
        return 'Chick-Fil-A'
    elif 'chik' in t:
        return 'Chick-Fil-A'
    elif 'subway' in t:
        return 'Subway'
    elif 'five guys' in t:
        return 'Five Guys'
    elif 'dumpling' in t:
        return 'Dumpling Food Truck'
    elif 'ValuPort' in s:
        return 'O-Hill Deposit'
    else:
        return s

In [None]:
# Used for .apply in pandas to add a weekday column to df
def add_weekday(day):
    return day.weekday()

In [None]:
# Determining what meal it is
def meal_cat(t):
    if t.hour<4:
        return 'Late Night'
    elif t.hour<12:
        return 'Breakfast'
    elif t.hour<15:
        return 'Lunch'
    elif t.hour<21:
        return 'Dinner'
    else:
        return 'Late Night'

In [None]:
# Returns the time in seconds from HH:mm:ss
def time_in_day(t):
    return t.hour*60*60+t.minute*60+t.second

In [1]:
# Loads data from a file
def load_data(filepath):
    # Instantiating lists
    post_time = []
    loc = []
    typ = []
    amt = []
    data = {'Time':post_time, 'Location':loc, 'Type':typ, 'Amount':amt}

    # Get HTML file
    with open(filepath) as fp:
        soup = BeautifulSoup(fp)

    # Finding table elements
    rows = soup.find('div', id='divHist').find('tbody').find_all('tr')
    for i in range(1,len(rows)):
        process_row(rows[i], data)

    # Creating dataframe
    # Forwardfill Nulls
    df = pd.DataFrame(data).fillna(method='ffill')
    df['Adjusted_Loc'] = df['Location'].apply(process_location)
    df['Weekday'] = df['Time'].apply(add_weekday)
    df['Meal'] = df['Time'].apply(meal_cat)
    df['ToD'] = df['Time'].apply(time_in_day)
    return df

In [None]:
df = load_data

In [None]:
count = 0
for i in df.iloc[:,0]:
    if i.hour>22 or i.hour<2:
        count+=1
print(count)

In [None]:
for i in df.iloc[:,0]:
    if i.hour>20 and i.hour<23:
        print(i)

In [None]:
import matplotlib.pyplot as plt

In [None]:
mon = df[(df['Weekday']==0)]

In [None]:
mon

In [None]:
for i in mon.iloc[:,0]:
    if i.hour<12:
        print(i)