# INDIVIDUAL FOOTPRINTS

In [1]:
import numpy as np
import datetime
import json
import pylab
import pandas as pd
import matplotlib.pyplot as plt
import os, sys

## Preparacion de datos

### Cargando datos

In [2]:

def leer_data():
    outfile='./SOURCES/data.csv'
    data = pd.read_csv(outfile)
    return data

data_original = leer_data()
data_original.head(3)

Unnamed: 0,client_id,date,año,mes,dia,hora,merchant_departement,merchant_province,merchant_district,mcc,mccg,client_age,quantity,amount_sol,dia_semana,turno
0,NNeQwQy9MAQ=,2016-07-15 22:23:25,2016,7,15,22,LIMA,LIMA,San Juan De Luriganc,7994,2,33.0,1,100.0,Fri,4-Noche
1,qFRoPHPOc/I=,2016-06-21 21:30:55,2016,6,21,21,LIMA,LIMA,San Juan De Luriganc,7994,2,52.0,1,20.0,Tues,4-Noche
2,qFRoPHPOc/I=,2017-02-01 01:29:59,2017,2,1,1,LIMA,LIMA,San Juan De Luriganc,7994,2,52.0,1,100.0,Weds,1-Madrugada


### Preparacion de datos

In [3]:
data = data_original[['client_id','mccg','date','año','mes','dia','hora','quantity','amount_sol']]
data.head(3)   

Unnamed: 0,client_id,mccg,date,año,mes,dia,hora,quantity,amount_sol
0,NNeQwQy9MAQ=,2,2016-07-15 22:23:25,2016,7,15,22,1,100.0
1,qFRoPHPOc/I=,2,2016-06-21 21:30:55,2016,6,21,21,1,20.0
2,qFRoPHPOc/I=,2,2017-02-01 01:29:59,2017,2,1,1,1,100.0


# FOOTPRINT PARA CADA MCCG

## Unidad de TXs temporales (U)

### Funciones

In [4]:
# definimos los 4 time_windows que usaremos

def time_window(hora):
    tw = 9999
    if hora >=0:
        tw = 0      # Madrugada
    if hora >=6:
        tw = 1      # Mañana
    if hora >=12:
        tw = 2      # Tarde
    if hora >=18:
        tw = 3      # Noche
    return tw

In [5]:
# Definimos los U 

def procesar_u(user):    
    uid=list(user['client_id'])[0]
    
    # Lista los años en que tiene txs el usuario
    years = set(list(user['año']))
    anni = {year:{} for year in list(years)}
    
    # para cada fila (para cada fecha)
    for dat in  range(0,len(user)):
        año = user.iloc[dat]['año']
        week=datetime.datetime(año,user.iloc[dat]['mes'],user.iloc[dat]['dia']).isocalendar()[1]
        weekday=datetime.datetime(año,user.iloc[dat]['mes'],user.iloc[dat]['dia']).weekday()
        turn = time_window(data.iloc[dat]['hora'])
        
        # Si la semana no existe en el año
        if not(week in anni[año]):
            anni[año][week] = {}
        # Si el turno no existe en la semana y año
        if not (turn in anni[año][week]):
            anni[año][week][turn]=np.array([0]*7)  #CUATRO TURNOS
            
        anni[año][week][turn][weekday]+=user.iloc[dat]['quantity'] # suma cantidades "importancia por compras"
        #anni[año][week][turn][weekday]+=user.iloc[dat]['amount_sol'] # suma montos "importancia por gastos"
            
    return uid,anni

### Procesando U de cada MCCG

In [6]:
mccg = data.groupby(['mccg']).min().index.values
mccg

array([ 2,  3,  4,  5,  6,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
       21, 22, 23, 25, 26, 27, 28])

In [None]:
for j in range(len(mccg)):
    print()
    data_temp = data.loc[data['mccg'] == mccg[j]]
    
    path = "RESULTS/MCCG_"+str(mccg[j])
    file_name="mccg_"+str(mccg[j])
    try:
        os.stat(path)
    except:
        os.mkdir(path) 

    file='./%s/%s.json' %(path,file_name)
    
    ##################################################
    #        Procesando U de cada CLIENTE
    ##################################################
    
    # Extraemos la lista de clientes sin repetir
    clientes =  data_temp.groupby('client_id').client_id.count().index
    profiles={}
    
    print("Number of rows "+str(len(data_temp)))
    # Para cada cliente
    for cliente in clientes:
        cliente_i= data_temp[data_temp['client_id'] == cliente]
        ## ejecutamos para cada usuario
        results=procesar_u(cliente_i)
        profiles[results[0]]=results[1]
    
    individual_footprint="%s.individual_footprint" %(file)
    fw=open(individual_footprint,'w')
    fw.write('customer_id,year,week,profile_id,turn,size,d0t0,d1t0,d2t0,d3t0,d4t0,d5t0,d6t0,d0t1,d1t1,d2t1,d3t1,d4t1,d5t1,d6t1,d0t2,d1t2,d2t2,d3t2,d4t2,d5t2,d6t2,d0t3,d1t3,d2t3,d3t3,d4t3,d5t3,d6t3\n')
    footprints=0
    for uid in profiles:
        profile_id=0
        for year in profiles[uid]:
            for week in profiles[uid][year]:
                temp=np.zeros(28)
                for turn in profiles[uid][year][week]:
                    d=profiles[uid][year][week][turn]
                    if(turn == 0):
                        for i in range(0,7):
                            temp[i] += d[i]
                    if(turn == 1):
                        for i in range(7,14):
                            temp[i] += d[i-7]
                    if(turn == 2):
                        for i in range(14,21):
                            temp[i] += d[i-14]
                    if(turn == 3):
                        for i in range(21,28):
                            temp[i] += d[i-21]


                fw.write(''+str(uid)+','+str(year)+','+str(week)+','+str(profile_id)+','+str(turn)+','+str(sum(temp))+','
                         +str(temp[0])+','+str(temp[1])+','+str(temp[2])+','+str(temp[3])+','+str(temp[4])+','+str(temp[5])+','+str(temp[6])+','
                         +str(temp[7])+','+str(temp[8])+','+str(temp[9])+','+str(temp[10])+','+str(temp[11])+','+str(temp[12])+','+str(temp[13])+','
                         +str(temp[14])+','+str(temp[15])+','+str(temp[16])+','+str(temp[17])+','+str(temp[18])+','+str(temp[19])+','+str(temp[20])+','
                         +str(temp[21])+','+str(temp[22])+','+str(temp[23])+','+str(temp[24])+','+str(temp[25])+','+str(temp[26])+','+str(temp[27])
                         +'\n')

                profile_id = profile_id + 1
        footprints+=profile_id
        fw.flush()
    fw.close()
    print ("number of footprint: "+str(footprints))
    print("mccg_"+str(mccg[j])+" Finalizado")


Number of rows 3400
number of footprint: 1836
mccg_2 Finalizado

Number of rows 653
number of footprint: 633
mccg_3 Finalizado

Number of rows 56
number of footprint: 56
mccg_4 Finalizado

Number of rows 11465
number of footprint: 10687
mccg_5 Finalizado

Number of rows 72
number of footprint: 71
mccg_6 Finalizado

Number of rows 12819
number of footprint: 12112
mccg_8 Finalizado

Number of rows 180
number of footprint: 169
mccg_9 Finalizado

Number of rows 9417
