In [1]:
import csv
import pandas as pd

In [2]:
df = pd.read_csv('auto_debt.csv', sep=',', 
    encoding='utf-8')
origien = df.MCC_TIPO_ORIGEN
fecha_pago = df.FECHA_PAGO
importe = df.MCC_IMPORTE
cust_id = df.OBN_OBJETO_ID

In [3]:
# simplify: make dataframe piicking a few columns that are interesting to us
my_df = pd.DataFrame({'OBN_OBJETO_ID' : cust_id, 
                  'MCC_TIPO_ORIGEN' : origien,
                 'MCC_IMPORTE' : importe,
                   'FECHA_PAGO' : fecha_pago})
my_df.head()

Unnamed: 0,FECHA_PAGO,MCC_IMPORTE,MCC_TIPO_ORIGEN,OBN_OBJETO_ID
0,01/01/1900,21.0,SEL,2789528.0
1,11/04/2014,0.02,PAO,8280920.0
2,12/11/2015,66.31,PAO,4928087.0
3,24/06/2014,19.02,MAJ,8505200.0
4,01/01/1900,21.84,NULO,7029754.0


In [5]:
# convert the dtype to datetime format, so that we can manipulate it 
my_df['FECHA_PAGO'] = pd.to_datetime(df.FECHA_PAGO, dayfirst=True)

In [6]:
my_df.head()

Unnamed: 0,FECHA_PAGO,MCC_IMPORTE,MCC_TIPO_ORIGEN,OBN_OBJETO_ID
0,1900-01-01,21.0,SEL,2789528.0
1,2014-04-11,0.02,PAO,8280920.0
2,2015-11-12,66.31,PAO,4928087.0
3,2014-06-24,19.02,MAJ,8505200.0
4,1900-01-01,21.84,NULO,7029754.0


In [8]:
# add a new columns called month, year (as strings)
my_df['Month'] = my_df.FECHA_PAGO.dt.month.astype(str)
my_df['Year'] = my_df.FECHA_PAGO.dt.year.astype(str)
my_df.head()

Unnamed: 0,FECHA_PAGO,MCC_IMPORTE,MCC_TIPO_ORIGEN,OBN_OBJETO_ID,Month,Year
0,1900-01-01,21.0,SEL,2789528.0,1.0,1900.0
1,2014-04-11,0.02,PAO,8280920.0,4.0,2014.0
2,2015-11-12,66.31,PAO,4928087.0,11.0,2015.0
3,2014-06-24,19.02,MAJ,8505200.0,6.0,2014.0
4,1900-01-01,21.84,NULO,7029754.0,1.0,1900.0


In [9]:
# Group by year
g = my_df.groupby('Year')
g

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x108fe0d10>

In [10]:
# Make a special group for 2014
group_2014 = g.get_group('2014.0')
group_2014

Unnamed: 0,FECHA_PAGO,MCC_IMPORTE,MCC_TIPO_ORIGEN,OBN_OBJETO_ID,Month,Year
1,2014-04-11,0.02,PAO,8280920.0,4.0,2014.0
3,2014-06-24,19.02,MAJ,8505200.0,6.0,2014.0
6,2014-04-11,1989.72,NULO,8280920.0,4.0,2014.0
7,2014-01-17,35.24,NULO,8424899.0,1.0,2014.0
9,2014-09-05,378.00,NULO,7778202.0,9.0,2014.0
13,2014-06-24,624.52,NULO,8505200.0,6.0,2014.0
20,2014-04-11,1943.40,PAO,8280920.0,4.0,2014.0
21,2014-09-05,378.00,PAO,7778202.0,9.0,2014.0
22,2014-01-17,38.55,PAO,8424899.0,1.0,2014.0
23,2014-06-24,605.50,PAO,8505200.0,6.0,2014.0


In [11]:
group_2014.Month.value_counts()

5.0     212
9.0      48
4.0      36
10.0     26
11.0     26
12.0     21
1.0      16
8.0      13
7.0      12
3.0       9
6.0       6
Name: Month, dtype: int64

In [12]:
month = group_2014.groupby('Month')
month

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x10902c110>

In [14]:
# Here are the sums for each month in 2014
month.MCC_IMPORTE.sum()

Month
1.0      9098.31
10.0     2438.02
11.0     4354.94
12.0     1159.96
3.0      1061.18
4.0     12542.16
5.0     70525.78
6.0      1349.56
7.0       119.48
8.0      2198.56
9.0     10129.26
Name: MCC_IMPORTE, dtype: float64