In [202]:
import numpy as np
import pandas as pd

import os

import matplotlib.pyplot as plt
import seaborn as sns

import datetime

## start working with the finances information sheet excel file

In [203]:
#load the whole finances information sheet
util_sheets = pd.read_excel('../../../Finances/utils info/legacy/Finances Information Sheet.xlsx', header=None, sheet_name=None)

In [204]:
#show me the keys I use to get each sheet in the file
print(util_sheets.keys())

#get the basic utilities info sheet
utils = util_sheets['Utilities 507 N Curtis']

odict_keys(['Utilities 507 N Curtis', 'All Spending', 'Total Spending Tracker', 'Spending Graph', 'CC Points Tracker', 'Amazon $'])


In [205]:
#use this string query to get the rows that initiate the price/type/breakdown tables I use
gas_header = utils[1] == 'GAS (So Cal Gas)'

In [206]:
#get the indices for the billing period (it is 2 rows below the header)
per = utils.loc[gas_header, :].index + 2

#get the indices for the billing amnt (it is 3 rows below the header)
amnt = utils.loc[gas_header, :].index + 3

#create neat tables with just the 3 utils periods and information
bill_pers = utils.loc[per, [1,2,3]].rename(index=str, columns={1: "gas", 2: "elec", 3: 'internet'})
bill_amnts = utils.loc[amnt, [1,2,3]].rename(index=str, columns={1: "gas", 2: "elec", 3: 'internet'})

In [207]:
#create the multi-column-indexed dataframe that will replace bill_pers
iterables = [['gas', 'elec', 'internet'], ['start', 'end', 'amnt']]

cols = pd.MultiIndex.from_product(iterables)

pers_split = pd.DataFrame(columns=cols)

In [208]:
#need to split the periods into start and end dates for each entry

for util in ['gas', 'elec', 'internet']:
    
    #first collapse out all spaces, then split at the dash
    zz = bill_pers[util].str.replace(' ', '').str.split('-', expand=True).reset_index(drop=True)

    pers_split.loc[:, (util, 'start')] = zz[0]
    pers_split.loc[:, (util, 'end')] = zz[1]
    
    pers_split.loc[:, (util, 'amnt')] = bill_amnts.reset_index(drop=True)[util]
    
bills = pers_split

In [209]:
#bills.to_csv('../personal/utils info/bills_premelt.csv', index=False)

## Deal with the old bills from word doc

In [210]:
oldbills = pd.read_excel('../../../Finances/utils info/legacy/old finances from word.xlsx', sheet_name='Sheet2')
oldbills.head()

Unnamed: 0,open,close,util,amnt,note
0,2013-08-15 00:00:00,2013-09-04 00:00:00,internet,60.0,includes opening fees
1,2013-08-01 00:00:00,2013-08-28 00:00:00,electricity,87.0,
2,2013-08-01 00:00:00,2013-08-14 00:00:00,gas,28.5,includes opening fees
3,2013-09-15 00:00:00,2013-10-14 00:00:00,internet,30.0,
4,2013-08-29 00:00:00,2013-09-13 00:00:00,electricity,108.0,


In [211]:
def dtime_to_string (obj):
    
    if isinstance(obj, datetime.datetime):
    
        year = str(obj.year)
        month = str(obj.month)
        day = str(obj.day)
    
        return month + '/' + day + '/' + year
    else:
        return obj

In [212]:
#replace the datetime open and close objects with strings to be consistent with bigger bills list

oldbills['open'] = oldbills['open'].apply(dtime_to_string)
oldbills['close'] = oldbills['close'].apply(dtime_to_string)

In [213]:
# oldbills.to_csv('../../../Finances/utils info/legacy/oldbills_tidy.csv', index=False)

## load the bills information and work with it more

In [214]:
bills = pd.read_csv('../../../Finances/utils info/legacy/bills_premelt.csv', header=[0,1])

In [215]:
bills.head()

Unnamed: 0_level_0,gas,gas,gas,elec,elec,elec,internet,internet,internet
Unnamed: 0_level_1,start,end,amnt,start,end,amnt,start,end,amnt
0,3/18/2014,4/16/2014,5.81,4/1/2014,4/30/2014,5.84,4/15/2014,5/14/2014,29.99
1,4/16/2014,5/15/2014,4.77,4/30/2014,5/30/2014,72.55,5/15/2014,6/15/2014,29.99
2,5/16/2014,6/15/2014,6.12,5/30/2014,6/30/2014,56.38,6/15/2014,7/14/2014,29.99
3,6/16/2014,7/15/2014,6.15,6/30/2014,7/30/2014,226.6,7/15/2014,8/14/2014,29.99
4,7/16/2014,8/15/2014,4.77,7/30/2014,8/30/2014,122.63,8/15/2014,9/14/2014,44.99


In [216]:
#convoluted way to melt the multiindex columns by utility (i didn't really understand how to do this over all utils at once)
#and then put them all together 

all_utils_melted = []

for i, utility in enumerate(['gas', 'elec', 'internet']):

    bill_melt = pd.melt(bills, id_vars=[(utility, 'start'), (utility, 'end')], value_vars=[(utility, 'amnt')], value_name='amnt')

    all_utils_melted.append((bill_melt.rename({(utility, 'start') : 'open',
                                            (utility, 'end') : 'close',
                                            'variable_0' : 'util'}, axis='columns')
                           .drop('variable_1', axis='columns')
                          ))
    
bills_melted = pd.concat(all_utils_melted).reset_index(drop=True)

### load in the melted old bills from word again

In [217]:
def names (string):
    if string == 'electricity':
        d = 'elec'
    else:
        d = string
    
    return d

In [218]:
#make the old bills util entries compatible with the melted (non old) bills

oldbills_melted = pd.read_csv('../../../Finances/utils info/legacy/oldbills_tidy.csv')

oldbills_melted['util'] = oldbills_melted['util'].apply(names)

oldbills_melted.head()

Unnamed: 0,open,close,util,amnt,note
0,8/15/2013,9/4/2013,internet,60.0,includes opening fees
1,8/1/2013,8/28/2013,elec,87.0,
2,8/1/2013,8/14/2013,gas,28.5,includes opening fees
3,9/15/2013,10/14/2013,internet,30.0,
4,8/29/2013,9/13/2013,elec,108.0,


In [219]:
oldbills_melted.shape

(25, 5)

In [220]:
#join the old and new bills together in eternal union

pd.concat([oldbills_melted, bills_melted], sort=False).sort_values('open').reset_index(drop=True).to_csv('../../../Finances/utils info/legacy/testytesty.csv')

In [221]:
#fixed some entry problems in excel out of script

all_bills = pd.read_csv('../../../Finances/utils info/legacy/testytesty.csv')


In [222]:
def get_dtime (string):
    return datetime.datetime.strptime(string, '%m/%d/%Y')

In [200]:
all_bills['open'] = all_bills['open'].apply(get_dtime)
all_bills['close'] = all_bills['close'].apply(get_dtime)

all_bills

Unnamed: 0,open,close,util,amnt,note
0,2014-01-15,2014-02-14,gas,6.00,
1,2017-12-01,2018-01-31,elec,63.15,
2,2014-02-15,2014-03-14,gas,6.00,
3,2017-02-28,2017-03-29,elec,47.87,
4,2018-04-30,2018-05-30,elec,64.14,
5,2018-06-30,2018-07-30,elec,116.00,
6,2018-09-30,2018-10-30,elec,10.18,
7,2018-01-01,2018-02-28,elec,62.15,
8,2015-01-12,2015-02-12,gas,6.37,
9,2014-01-15,2014-02-14,internet,30.00,


In [223]:
#all_bills.sort_values('close').reset_index(drop=True).to_csv('utilities_tidy.csv', index=False)