In [2]:
%matplotlib notebook
%matplotlib inline

In [3]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import time
import seaborn as sns
import calendar
import random
import json 

## Read in Dirty CSV

In [4]:
df_dirty = pd.read_csv('Data/CC_2020YTD_Dirty.csv')

In [5]:
df_dirty

Unnamed: 0,Transaction Date,Post Date,Description,Category,Type,Amount,Memo
0,11/17/2020,11/17/2020,Payment Thank You-Mobile,,Payment,6278.18,
1,11/15/2020,11/17/2020,BANGSALONVERIZONCENTER,Personal,Sale,-44.00,
2,11/16/2020,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-32.60,
3,11/16/2020,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-84.67,
4,11/15/2020,11/17/2020,NORDSTROM #0675,Shopping,Return,169.47,
...,...,...,...,...,...,...,...
907,01/02/2020,01/03/2020,WALMART.COM,Shopping,Sale,-52.50,
908,01/02/2020,01/03/2020,UBER TRIP,Travel,Sale,-8.00,
909,01/01/2020,01/02/2020,UBER TRIP,Travel,Sale,-13.09,
910,12/31/2019,01/02/2020,7-ELEVEN 33452,Gas,Sale,-6.20,


## Clean Dirty CSV

In [6]:
df_clean = df_dirty.copy()  

In [7]:
#Split payments and Expenses from the Amount column
df = df_clean.copy()  
df['Expenses'] = df['Amount'] 
df['Payments'] = df['Amount']  
df['Expenses'] = df.loc[df['Expenses'] < 0, 'Expenses']
df['Payments'] = df.loc[df['Payments'] > 0, 'Payments']
#drop memo column
df.drop(['Memo'], axis=1, inplace =True)
#

df

Unnamed: 0,Transaction Date,Post Date,Description,Category,Type,Amount,Expenses,Payments
0,11/17/2020,11/17/2020,Payment Thank You-Mobile,,Payment,6278.18,,6278.18
1,11/15/2020,11/17/2020,BANGSALONVERIZONCENTER,Personal,Sale,-44.00,-44.00,
2,11/16/2020,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-32.60,-32.60,
3,11/16/2020,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-84.67,-84.67,
4,11/15/2020,11/17/2020,NORDSTROM #0675,Shopping,Return,169.47,,169.47
...,...,...,...,...,...,...,...,...
907,01/02/2020,01/03/2020,WALMART.COM,Shopping,Sale,-52.50,-52.50,
908,01/02/2020,01/03/2020,UBER TRIP,Travel,Sale,-8.00,-8.00,
909,01/01/2020,01/02/2020,UBER TRIP,Travel,Sale,-13.09,-13.09,
910,12/31/2019,01/02/2020,7-ELEVEN 33452,Gas,Sale,-6.20,-6.20,


## Save Clean to new CSV

In [8]:
# create new empty column: Day  
df['Day'] = ''
   
# create new empty column: Month  
df['Month'] = ''
df
# create new empty column: Year
df['Year'] = ''
df
# **https://www.dataindependent.com/pandas/pandas-to-datetime/
# Set new column Day = the day extracted from Tranaction Date column
df['Day'] = df['Transaction Date'].apply(lambda x: x.split('/')[1])
df
# Set new column Month = the month extracted from Tranaction Date column
df['Month'] = df['Transaction Date'].apply(lambda x: x.split('/')[0])
df
# Set new column Year = the month extracted from Tranaction Date column
df['Year'] = df['Transaction Date'].apply(lambda x: x.split('/')[2])
df 
# Reorder Columns
df = df.reindex(columns= ['Transaction Date', 'Year', 'Month', 'Day', 'Post Date', 'Description', 'Category', 'Type', 'Amount', 'Expenses', 'Payments'])
# Rename column
df = df.rename(columns ={'Transaction Date':'TransactionDate', 'Post Date': 'PostDate'})

df_clean = df
df

Unnamed: 0,TransactionDate,Year,Month,Day,PostDate,Description,Category,Type,Amount,Expenses,Payments
0,11/17/2020,2020,11,17,11/17/2020,Payment Thank You-Mobile,,Payment,6278.18,,6278.18
1,11/15/2020,2020,11,15,11/17/2020,BANGSALONVERIZONCENTER,Personal,Sale,-44.00,-44.00,
2,11/16/2020,2020,11,16,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-32.60,-32.60,
3,11/16/2020,2020,11,16,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-84.67,-84.67,
4,11/15/2020,2020,11,15,11/17/2020,NORDSTROM #0675,Shopping,Return,169.47,,169.47
...,...,...,...,...,...,...,...,...,...,...,...
907,01/02/2020,2020,01,02,01/03/2020,WALMART.COM,Shopping,Sale,-52.50,-52.50,
908,01/02/2020,2020,01,02,01/03/2020,UBER TRIP,Travel,Sale,-8.00,-8.00,
909,01/01/2020,2020,01,01,01/02/2020,UBER TRIP,Travel,Sale,-13.09,-13.09,
910,12/31/2019,2019,12,31,01/02/2020,7-ELEVEN 33452,Gas,Sale,-6.20,-6.20,


In [9]:
df_clean.to_csv('Data/CC_2020YTD_Clean.csv', index=False)

## Visualize the CLean

In [10]:
df_clean = pd.read_csv('Data/CC_2020YTD_Clean.csv')

In [11]:
df = df_clean
df

Unnamed: 0,TransactionDate,Year,Month,Day,PostDate,Description,Category,Type,Amount,Expenses,Payments
0,11/17/2020,2020,11,17,11/17/2020,Payment Thank You-Mobile,,Payment,6278.18,,6278.18
1,11/15/2020,2020,11,15,11/17/2020,BANGSALONVERIZONCENTER,Personal,Sale,-44.00,-44.00,
2,11/16/2020,2020,11,16,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-32.60,-32.60,
3,11/16/2020,2020,11,16,11/17/2020,HILL COUNTRY DC LLC,Food & Drink,Sale,-84.67,-84.67,
4,11/15/2020,2020,11,15,11/17/2020,NORDSTROM #0675,Shopping,Return,169.47,,169.47
...,...,...,...,...,...,...,...,...,...,...,...
907,01/02/2020,2020,1,2,01/03/2020,WALMART.COM,Shopping,Sale,-52.50,-52.50,
908,01/02/2020,2020,1,2,01/03/2020,UBER TRIP,Travel,Sale,-8.00,-8.00,
909,01/01/2020,2020,1,1,01/02/2020,UBER TRIP,Travel,Sale,-13.09,-13.09,
910,12/31/2019,2019,12,31,01/02/2020,7-ELEVEN 33452,Gas,Sale,-6.20,-6.20,


In [12]:
df.to_json('Data/df_Clean.js', orient='records')
with open('Data/df_Clean.js') as datafile:
    data = json.load(datafile)
    data = str(data)
    data = 'var data = ' + data
    data = data.replace(': None', ': null')
    
#data

In [13]:
with open('Data/df_Clean.js', 'w') as datafile:
    datafile.write(data)