In [1]:
#This script is for analyzing credit card expenses.
#Import the library
import pandas as pd
import glob
import matplotlib.pyplot as plt
from datetime import datetime as dt
import plotly.offline as py
import plotly.graph_objects as go

In [2]:
#Concatenate the files
path = r'/content/drive/MyDrive/Nubank' # use your path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

nubank = pd.concat(li, axis=0, ignore_index=True)
nubank

Unnamed: 0,date,category,title,amount
0,2016-08-05,restaurante,Gaúcha Sport Bar,45.32
1,2016-08-10,,Pagamento recebido,-224.90
2,2016-08-11,supermercado,Casa de Carnes Moretti,66.90
3,2016-08-15,educação,Papelaria Cameron,24.90
4,2016-08-15,vestuário,Sirlene Provenci,24.90
...,...,...,...,...
1291,2020-02-21,transporte,Uberbr Uber Trip Help.,7.82
1292,2020-02-29,lazer,Mcm Cachoeirinha,4.00
1293,2020-02-29,serviços,Ebanx*Spotify,26.90
1294,2020-02-29,supermercado,Pag*Mcmbar,12.00


In [3]:
#Eliminate NaN
nubank.dropna(inplace=True)
nubank

Unnamed: 0,date,category,title,amount
0,2016-08-05,restaurante,Gaúcha Sport Bar,45.32
2,2016-08-11,supermercado,Casa de Carnes Moretti,66.90
3,2016-08-15,educação,Papelaria Cameron,24.90
4,2016-08-15,vestuário,Sirlene Provenci,24.90
5,2016-08-15,supermercado,Supermercado Bonato,26.44
...,...,...,...,...
1291,2020-02-21,transporte,Uberbr Uber Trip Help.,7.82
1292,2020-02-29,lazer,Mcm Cachoeirinha,4.00
1293,2020-02-29,serviços,Ebanx*Spotify,26.90
1294,2020-02-29,supermercado,Pag*Mcmbar,12.00


In [4]:
#Transform date in datetime
nubank['date'] = pd.to_datetime(nubank['date']) 
nubank.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1236 entries, 0 to 1295
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype         
---  ------    --------------  -----         
 0   date      1236 non-null   datetime64[ns]
 1   category  1236 non-null   object        
 2   title     1236 non-null   object        
 3   amount    1236 non-null   float64       
dtypes: datetime64[ns](1), float64(1), object(2)
memory usage: 48.3+ KB


In [5]:
#Sum by category
gastos = nubank.groupby('category').sum()
gastos.head(50)

Unnamed: 0_level_0,amount
category,Unnamed: 1_level_1
casa,96.48
educação,1607.63
eletrônicos,1169.69
lazer,2696.6
outros,2061.9
restaurante,12051.04
saúde,1231.42
serviços,2788.67
supermercado,4662.29
transporte,9784.83


In [6]:
#Create a variable with the sum total
gastos = nubank['amount'].sum()
print(gastos)

43142.97


In [7]:
#Sum by category
nubank = nubank.groupby('category').sum()
nubank.head()


Unnamed: 0_level_0,amount
category,Unnamed: 1_level_1
casa,96.48
educação,1607.63
eletrônicos,1169.69
lazer,2696.6
outros,2061.9


In [8]:
#Create a column call porcetagem
nubank['porcentagem'] = nubank['amount'].div(gastos)*100
nubank.head(50)

Unnamed: 0_level_0,amount,porcentagem
category,Unnamed: 1_level_1,Unnamed: 2_level_1
casa,96.48,0.223629
educação,1607.63,3.726285
eletrônicos,1169.69,2.711195
lazer,2696.6,6.250381
outros,2061.9,4.779226
restaurante,12051.04,27.932801
saúde,1231.42,2.854277
serviços,2788.67,6.463788
supermercado,4662.29,10.806604
transporte,9784.83,22.68001


In [9]:
#create a list of colors and a variable call valor
cores =[]
valor = nubank['porcentagem'].values.mean()
print(valor)

8.333333333333334


In [10]:
for x in nubank['porcentagem']:
  if x > valor:
    cores.append('red')
  else:
    cores.append('lightblue')

In [11]:
data = [go.Bar(x= nubank.index,
               y=nubank['porcentagem'],
               marker = {'color': cores,  #lista de cores
                         'line': {'color': '#333',
                                  'width': 2}
                        },
               opacity= 0.7
              )
       ]

# make layout:
configuracoes_layout = go.Layout(title='Gastos entre 08/2016-02/2020',
                                 yaxis={'title':'Porcentagem'},
                                 xaxis={'title':'Categoria'})

# Fig Object:
fig = go.Figure(data=data, layout=configuracoes_layout)

# Graphic plot:
py.iplot(fig, filename='GastosNubank')
