# Bank account data exploration (via BBVA's monthly reports)

## Loading stuff

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import matplotlib.dates as mdates

plt.style.use('seaborn')
plt.rcParams['figure.dpi'] = 400
plt.rcParams['savefig.dpi'] = 800
plt.rcParams["axes.grid"] = True

from extractor import extract
df = extract('reports').set_index('date').sort_index()

## Common routines

In [None]:
def group_by_month(dataframe, aggregation_dict):
    grouped_dataframe = dataframe.groupby(pd.Grouper(freq='M')).aggregate(aggregation_dict)
    grouped_dataframe.index = grouped_dataframe.index.map(lambda x: x if isinstance(x, str) else x.strftime('%Y-%m'))
    return grouped_dataframe

## Balance evolution

### Overall evolution

In [None]:
df.balance.plot()

### Monthly evolution at pay moment

In [None]:
payroll_movements = df[df['concept'].str.contains('NOMINA')]

payroll_movements['balance'].plot(marker = 'o')

### Monthly evolution in mean terms

In [None]:
balance_by_month_mean = group_by_month(df, {'balance': np.mean})

balance_by_month_mean.plot(marker='o', legend=False)

### Monthly evolution in diff terms

In [None]:
def diff(series):
    aslist = series.tolist()
    return aslist[-1] - aslist[0] if len(aslist) != 0 else 0

balance_by_month_diff = group_by_month(df, {'balance': diff})

balance_by_month_diff.plot(marker='o', legend=False)

## Spending vs incoming

### Spending vs incoming by concepts

In [None]:
spending = df.query('amount < 0')
incoming = df.query('amount > 0')
spending_by_concept = spending.groupby('concept').amount.sum()
incoming_by_concept = incoming.groupby('concept').amount.sum()

combined_amounts = pd.concat([spending_by_concept.rename('spending'), incoming_by_concept.rename('incoming')], axis=1)

combined_amounts.plot(kind='barh', width=1, figsize=(10,10), stacked=True)

### Last year spending vs incoming

In [None]:
df_last_year = df.last('12M')

spending_last_year = df_last_year.query('amount < 0')
incoming_last_year = df_last_year.query('amount > 0')

spending_last_year_by_month = group_by_month(spending_last_year, {'amount':np.sum}).amount
incoming_last_year_by_month = group_by_month(incoming_last_year, {'amount':np.sum}).amount

combined_amounts_last_year_by_month = pd.concat([spending_last_year_by_month, incoming_last_year_by_month], axis=1)

ax = balance_by_month_mean.tail(12).plot(legend=False, linestyle='-', marker='o', color='crimson')
ax = balance_by_month_diff.tail(12).plot(legend=False, linestyle='-', marker='o', color='goldenrod', ax=ax)
ax = combined_amounts_last_year_by_month.plot(kind='bar', stacked=True, legend=False, ax=ax)
ax.legend(['mean', 'diff']);

## Spending distribution

### Spending distribution by €

In [None]:
_, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)

spending.amount.abs().plot(kind='hist', bins=25, xlim=(0, 1000), ax=ax1)
spending.amount.abs().plot(kind='box', ylim=(0,150), yticks=range(0, 150, 10), ax=ax2)

### Spending by concept

In [None]:
spending_by_concept_sorted = spending_by_concept.abs().sort_values()

spending_by_concept_sorted.where(lambda x : x > 500).plot(kind='barh')

### Spending by month

In [None]:
spending_by_month = group_by_month(spending, {'amount': np.sum}).abs()

spending_by_month.amount.plot(kind='bar', width=1)