In [1]:
# Import libraries

import os
import pandas as pd
import numpy as np
from sklearn import preprocessing
from matplotlib import pyplot as plt, animation
import networkx as nx
from datetime import datetime
import math
from scipy.io import savemat


In [2]:

def break_large_comp(comp,input_user_size):
    new_cont = []
    rem_cont = []
    tmp      = list(comp)
    l        = len(comp)
    dec,mod = math.modf(l/input_user_size) 
    for j in range(0,int(mod)):
        tmp2 = tmp[j*input_user_size: (j+1)*input_user_size ]  
        new_cont.append(tmp2)
    if(dec>0):
        tmp3 = tmp[ int(mod)*input_user_size:  ]
        rem_cont.append(tmp3)
    return new_cont, rem_cont


#A function to break large graph into smaller subgraphs 
def create_small_graphs(comp,input_user_size):
    
    
    new_cont = []
    rem_cont = []
    for i in range(len(comp)):
        
        l = len(comp[i])
        if(l == input_user_size ): # first condition--> cluster size equal to input size
            new_cont.append(list(comp[i]))
        if(l < input_user_size ): #second condition-->cluster size less than input size
            rem_cont.append(comp[i])
        if(l > input_user_size ): #third condition --> cluster size more than input size
            
            new_c,rem_c = break_large_comp(comp[i],input_user_size)
            for item in new_c:
                new_cont.append(item)
            for item in rem_c:
                rem_cont.append(item)
            
    tmp = []
    for a in rem_cont:
        tmp.extend(a)
    
    new_c,rem_c = break_large_comp(tmp,input_user_size)
    for item in new_c:
        new_cont.append(item)
    
    return new_cont,rem_c
    
def create_Duplicate_Padding(A_sms,index,input_user_size):
    ll                         = len(index)
    sub                        = A_sms[np.ix_(index,index)]
    pad                        = input_user_size-ll     
    
    if(pad<=ll):
        A_new                      = np.zeros((input_user_size,input_user_size))
        A_new[0:ll,0:ll]           = sub
        A_new[ll:ll+pad,ll:ll+pad] = sub[0:pad,0:pad]
        user_list                  = np.append(index, index[0:pad])
    if(pad>ll):
        r          = math.ceil(pad/ll)+1
        tmp        = np.kron(np.eye(r,dtype=int),sub) # r is number of repeats
        tmp2       = np.tile(index,r) 
        
      
        A_new      = tmp[0:input_user_size,0:input_user_size]
        user_list  = tmp2[0:input_user_size]        
    
    return user_list,A_new
    

In [27]:
## Main GEDD loop to generate graphs


Storage_cont_call = []
Storage_cont_sms  = []
user_size         = 10   ## model's required size
sms_file_name     = 'sms_graph.pkl'    #provide path to sms and call graphs
call_file_name    = 'call_graph.pkl'    #provide path to sms and call graphs
sms_files         = pd.read_pickle(sms_file_name)
call_files        = pd.read_pickle(call_file_name)

for i in range(0,sms_files.shape[0]):
   
   
    user_list           = sms_files.usr_list[i]
    start_date          = sms_files.start_date[i]
    end_date            = sms_files.end_date[i]
    N                   = user_list.size
    A_sms               = sms_files.A_msg[i]
    A_call              = call_files.A_call[i]
    
    
    ## Processing CALL DATA
    
    # construct call graph and get connected components
    G_call = nx.from_numpy_matrix(A_call)
    sg_call = nx.connected_components(G_call)
    comp_call = []
    for c in sg_call: 
        comp_call.append(c)
    
    # FOR CALL : Break the components into user_input_size
    
    new_gr,rem_gr =  create_small_graphs(comp_call,user_size)
    
    # Get subgraphs for each sub-comp
    for tmp in new_gr:
        
        DF_call = {'usr_list':user_list, 'A_msg' :A_call[np.ix_(tmp,tmp)] , 'start_date': start_date, 'end_date': end_date}
        Storage_cont_call.append(DF_call)
        
    if(len(rem_gr)>0):
        tmp, A_pad = create_Duplicate_Padding(A_call,rem_gr[0],user_size)
        DF_call = {'usr_list':user_list, 'A_msg' :A_pad , 'start_date': start_date, 'end_date': end_date}
        Storage_cont_call.append(DF_call)

   ## Processing SMS DATA
    
    # construct call graph and get connected components
    G_sms = nx.from_numpy_matrix(A_sms)
    sg_sms = nx.connected_components(G_sms)
    comp_sms = []
    for c in sg_sms: 
        comp_sms.append(c)
    
    # FOR SMS : Break the components into user_input_size
    
    new_gr,rem_gr =  create_small_graphs(comp_sms,user_size)
    
    # Get subgraphs for each sub-comp
    for tmp in new_gr:
        
        DF_sms = {'usr_list':user_list, 'A_msg' :A_sms[np.ix_(tmp,tmp)] , 'start_date': start_date, 'end_date':  end_date}
        Storage_cont_sms.append(DF_sms)
        
    if(len(rem_gr)>0):
        tmp, A_pad = create_Duplicate_Padding(A_sms,rem_gr[0],user_size)
        DF_sms = {'usr_list':user_list, 'A_msg' :A_pad , 'start_date': start_date, 'end_date':  end_date}
        Storage_cont_sms.append(DF_sms)
        
Output_call = pd.DataFrame(Storage_cont_call)
Output_sms  = pd.DataFrame(Storage_cont_sms)

In [101]:
## Save graphs in local folder for use in models later on
Output_call.to_pickle("A_call_local_10.pkl")
Output_sms.to_pickle("A_sms_local_10.pkl")
