In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [2]:
def flatten_json(nested_json, exclude=['']):
    """Flatten json object with nested keys into a single level.
        Args:
            nested_json: A nested json object.
            exclude: Keys to exclude from output.
        Returns:
            The flattened json object if successful, None otherwise.
    """
    out = {}

    def flatten(x, name='', exclude=exclude):
        if type(x) is dict:
            for a in x:
                if a not in exclude: flatten(x[a], name + a + '_')
        elif type(x) is list:
            i = 0
            for a in x:
                flatten(a, name + str(i) + '_')
                i += 1
        else:
            out[name[:-1]] = x

    flatten(nested_json)
    return out

In [3]:
import json

with open('voice.json', 'r') as json_file:
	json_load = json.load(json_file)

json_load

[{'keyPerformanceIndicators': {'setUpSuccessful': 'NEUTRAL',
   'outgoingCallDuration': 0,
   'congestionRate': 0.0,
   'dropRate': 0.0,
   'handoverSuccessRate': 0.0,
   'handoverAttempts': [],
   'callDropped': False,
   'cell': None,
   'ber': 0.0,
   'successRate': 0.0,
   'congested': False,
   'receivedSignals': []},
  'networkInfo': {'operatorName': 'MTN Nigeria Communications',
   'networkType': '4G LTE'},
  'device': {'id': 'eY5WuR9RTQSdjte7tB8Yi6',
   'macAddress': '06:F0:6E:06:C8:94',
   'ipAddress': '10.130.28.86',
   'imeis': ['', '']},
  'manual': False,
  'location': None,
  'createdAt': 1629572761973},
 {'location': '9.0092725 7.461294',
  'networkInfo': {'operatorName': 'glo ng',
   'networkParameters': [{'value': '4G', 'unit': '', 'name': 'Data'},
    {'value': 'true', 'name': 'serving', 'unit': ''},
    {'unit': '', 'value': '2', 'name': 'Signal Level'},
    {'value': '-101', 'unit': 'dbm', 'name': 'Signal Strength '},
    {'value': '39', 'unit': 'ASU', 'name': 'Sign

In [4]:
df = pd.DataFrame([flatten_json(json_load) for json_load in json_load])

In [5]:
df.head()

Unnamed: 0,keyPerformanceIndicators_setUpSuccessful,keyPerformanceIndicators_outgoingCallDuration,keyPerformanceIndicators_congestionRate,keyPerformanceIndicators_dropRate,keyPerformanceIndicators_handoverSuccessRate,keyPerformanceIndicators_callDropped,keyPerformanceIndicators_cell,keyPerformanceIndicators_ber,keyPerformanceIndicators_successRate,keyPerformanceIndicators_congested,...,keyPerformanceIndicators_receivedSignals_650,keyPerformanceIndicators_receivedSignals_651,keyPerformanceIndicators_receivedSignals_652,keyPerformanceIndicators_receivedSignals_653,keyPerformanceIndicators_receivedSignals_654,keyPerformanceIndicators_receivedSignals_655,keyPerformanceIndicators_receivedSignals_656,keyPerformanceIndicators_receivedSignals_657,keyPerformanceIndicators_receivedSignals_658,keyPerformanceIndicators_receivedSignals_659
0,NEUTRAL,0,0.0,0.0,0.0,False,,0.0,0.0,False,...,,,,,,,,,,
1,SUCCESS,238,0.0,0.0,0.0,True,,inf,0.0,False,...,,,,,,,,,,
2,SUCCESS,241,0.0,0.0,0.0,True,,1.25,0.0,False,...,,,,,,,,,,
3,NEUTRAL,0,0.0,0.0,0.0,False,,0.0,0.0,False,...,,,,,,,,,,
4,NEUTRAL,0,0.0,0.0,0.0,False,,0.0,0.0,True,...,,,,,,,,,,


In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15592 entries, 0 to 15591
Columns: 1245 entries, keyPerformanceIndicators_setUpSuccessful to keyPerformanceIndicators_receivedSignals_659
dtypes: bool(3), float64(944), int64(2), object(296)
memory usage: 147.8+ MB


In [7]:
df.describe()

Unnamed: 0,keyPerformanceIndicators_outgoingCallDuration,keyPerformanceIndicators_congestionRate,keyPerformanceIndicators_dropRate,keyPerformanceIndicators_handoverSuccessRate,keyPerformanceIndicators_cell,keyPerformanceIndicators_ber,keyPerformanceIndicators_successRate,createdAt,keyPerformanceIndicators_cell_cellCDMA,keyPerformanceIndicators_cell_cellGSM_cellLAC,...,keyPerformanceIndicators_receivedSignals_650,keyPerformanceIndicators_receivedSignals_651,keyPerformanceIndicators_receivedSignals_652,keyPerformanceIndicators_receivedSignals_653,keyPerformanceIndicators_receivedSignals_654,keyPerformanceIndicators_receivedSignals_655,keyPerformanceIndicators_receivedSignals_656,keyPerformanceIndicators_receivedSignals_657,keyPerformanceIndicators_receivedSignals_658,keyPerformanceIndicators_receivedSignals_659
count,15592.0,15592.0,15592.0,15592.0,0.0,15592.0,15592.0,15592.0,0.0,5639.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
mean,50.82459,0.0,0.0,0.0,,inf,0.0,1635132000000.0,,24833.106402,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0
std,133.092354,0.0,0.0,0.0,,,0.0,12214450000.0,,21464.109919,...,,,,,,,,,,
min,0.0,0.0,0.0,0.0,,-7.5,0.0,1593341000000.0,,-1.0,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0
25%,0.0,0.0,0.0,0.0,,0.0,0.0,1632468000000.0,,8110.0,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0
50%,17.0,0.0,0.0,0.0,,0.0,0.0,1639478000000.0,,20387.0,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0
75%,53.0,0.0,0.0,0.0,,0.0,0.0,1641240000000.0,,50566.0,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0
max,3600.0,0.0,0.0,0.0,,inf,0.0,1646090000000.0,,60587.0,...,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-30.0,-61.0,-30.0,-67.0


In [8]:
df.to_csv("Voice_dirty_data.csv")