# Notebook for reading and analysing Json file
by E. Baibuz

#### Reading Json file

In [6]:
import os
import collections
def read_json(path_to_json):
    '''
    This function read Json file and returns list of dictionaries
    '''
    import json   
    with open(path_to_json, 'rb') as infile:
        data = json.load(infile)
    return data

path_to_json = os.path.join(os.getcwd(),'data.json')
data = read_json(path_to_json)

## Simple analysis of a Json file

In [34]:
def analyse_by_key(data_list, key):
    '''
    This function analyses list of dictionaries for a given key
    It prints and returns values with counters for the key 
    '''
    values = []
    for item in data_list:
        value = item[key]
        values.append(value)    
    c = collections.Counter()
    for value in values:
        c[value]+=1
    print("Values for key '%s':"%key, c)
    return c

In [35]:
for key in data.keys():
    print("*"*5,"Key '%s'"%key,"*"*5)
    data_list = data['events_data']
    print("Number of entries: ",len(data_list))
    print("Keys in entries:",[x for x in data_list[0].keys()])


***** Key 'events_data' *****
Number of entries:  107
Keys in entries: ['id', 'client_id', 'user_id', 'category', 'action', 'options']


### 'category' statistics

In [36]:
analyse_by_key(data_list, 'category')

Values for key 'category': Counter({'page': 74, 'datepicker': 15, 'table': 10, 'report': 8})


Counter({'page': 74, 'datepicker': 15, 'table': 10, 'report': 8})

In [37]:
##### 'action' statistics

In [38]:
analyse_by_key(data_list, 'action')

Values for key 'action': Counter({'enter': 74, 'apply': 15, 'sort': 10, 'change_mode': 6, 'link_click': 2})


Counter({'enter': 74,
         'apply': 15,
         'sort': 10,
         'change_mode': 6,
         'link_click': 2})

### How many clients with client_id 

In [40]:
print("Number of clients:",len(analyse_by_key(data_list, 'client_id')))

Values for key 'client_id': Counter({60459: 12, 27115: 11, 62602: 11, 56544: 8, 58113: 7, 18923: 5, 23207: 4, 52492: 3, 24500: 2, 53666: 2, 41737: 2, 62526: 1, 61944: 1, 62017: 1, 63120: 1, 59883: 1, 2913: 1, 62940: 1, 63149: 1, 45391: 1, 63019: 1, 15759: 1, 62007: 1, 62941: 1, 51009: 1, 62236: 1, 44682: 1, 39709: 1, 62144: 1, 25301: 1, 33645: 1, 62089: 1, 26705: 1, 1103: 1, 62439: 1, 44196: 1, 50149: 1, 57009: 1, 57207: 1, 32792: 1, 27323: 1, 58330: 1, 61245: 1, 19799: 1, 48128: 1, 57613: 1, 61244: 1, 53018: 1, 61078: 1, 20820: 1, 49700: 1})
Number of clients: 51


### How many 'action' were performed by the client with a given 'client_id'?

In [49]:
client_id = 62602
actions = [item['action'] for item in data_list if (item['client_id']==client_id and item['action']!=None)]
print("Client %i performed %i actions:"%(client_id,len(actions)),actions)

Client 62602 performed 11 actions: ['enter', 'enter', 'link_click', 'enter', 'enter', 'enter', 'enter', 'enter', 'link_click', 'enter', 'enter']


### How many 'action'=page were performed by the client with a given 'client_id'?

In [48]:
client_id = 62602
action_type = 'page'
actions = 0
for item in data_list:
    if (item['client_id']==client_id and item['action']==action_type):
        actions +=1
print("Client %i performed action '%s' %i times"%(client_id,action_type,actions))

Client 62602 performed action 'page' 0 times:


### How many clients performed actions with category=report

In [56]:
import numpy as np
category = 'report'
clients = [item['client_id'] for item in data_list if (item['category']==category)]
print("Number of unique clients who performed actions with category '%s':"%category,len(np.unique(np.array(clients))))


Number of unique clients who performed actions with category 'report': 3


In [57]:
print("Clients who performed actions with category=report'%s':"%category,np.unique(np.array(clients)))

Clients who performed actions with category=report'report': [58113 60459 62602]
