In [19]:
import pandas as pd
import numpy as np
import os
import json
from datetime import datetime

In [20]:
def _create_dataframes_from_json (json_x, idx):
    '''Return a dataframe from the json file (the idx is only important for the next function)'''

    # It is important to set the working directory to the correct path

    # Import modules
    import numpy as np
    import pandas as pd
    import json

    # Load JSON and create dictionary json_data
    with open(json_x) as json_file:
        json_data = json.load(json_file)

    # Get list of dictionaries with the corresponding x,y-pairs and time
    events_lt = json_data.get('events')

    # Create dataframe from list of dictionaries
    events_df = pd.DataFrame(events_lt)

    # Get dictionary with the information of the session
    session_dc = json_data.get('session')

    # Create dataframe from dictionary (with one row) (session data)
    session_df_temp = pd.DataFrame.from_dict(session_dc, orient ='index')
    session_df_temp = session_df_temp.transpose()

    # Create dataframe with number of rows corresponding to events_df
    session_df = session_df_temp.append([session_df_temp]*(events_df.x.count()-1),ignore_index=True)

    # Get the first rows of the json file in a dictionary
    beg_dc = dict((k, json_data[k]) for k in ('startTime', 'websitePageUrl', 'visitTime',"engagementTime", "pageTitle", "url", 
                                       "viewportWidth", "viewportHeight", "tags"))

    # Create dataframe from dictionary (with one row) (data of first rows)
    beg_df_temp = pd.DataFrame.from_dict(beg_dc, orient = 'index').T

    # Create dataframe with number of rows corresponding to events_df
    beg_df = beg_df_temp.append([beg_df_temp]*(events_df.x.count()-1),ignore_index=True)

    # Concatenate all three dataframes into one
    df = pd.concat([ beg_df, events_df, session_df],axis=1)
    
    # Add index column (for further function)
    df.insert(loc=0,column='idx', value = idx)
    
    # Check if 'ta' column exists
    if 'ta' not in df.columns:
        df.insert(loc=12,column='ta', value = np.NaN)
        
    # Check if 'v' column exists
    if 'v' in df.columns:
        del df['v']

    # Return dataframe
    return df

In [21]:
def _create_dataframe_from_files(work_dir):
    '''This function takes the working directory as an input, uses the function '_create_dataframes_from_json' to 
    transform the json files into dataframes and returns a 'big' dataframe where the single dataframes of each file 
    are concatenated'''
    
    # Return list of files in working directory
    list_dir = os.listdir(work_dir)
    
    # Initialize dataframe with all files
    df_total = pd.DataFrame(columns=['idx', 'startTime', 'websitePageUrl', 'visitTime', 'engagementTime',
       'pageTitle', 'url', 'viewportWidth', 'viewportHeight', 'tags', 'e', 't',
       'ta', 'ty', 'x', 'y', 'id', 'created', 'lastActivity', 'pages',
       'duration', 'engagementTime', 'totalFriction', 'country', 'region',
       'city', 'isp', 'ip', 'lang', 'userAgent', 'browser', 'browserVersion',
       'os', 'osVersion', 'device', 'referrer', 'referrerType', 'screenRes',
       'entryPage', 'tags', 'variables', 'watched', 'starred', 'lng', 'lat',
       'visitorId', 'gdpr', 'visitorName', 'playbackUrl'])
    
    # Loop over every file in the list and return as a dataframe
    for index, file in enumerate(list_dir):
        df_single = _create_dataframes_from_json(file,index)
        
        # Append dataframe of a single file to the dataframe with all files
        df_total = df_total.append(df_single, ignore_index = True)
        
    # Set the index to idx and t & sort index
    df_total = df_total.set_index(['idx', 't'])
    df_total = df_total.sort_index()
    
    # Return dataframe
    return(df_total)      

In [22]:
direction = r'C:\Users\Sellit\Desktop\BA_Coding\Data\test2'

In [23]:
os.chdir(direction)

In [24]:
start=datetime.now()
df_test = _create_dataframe_from_files(direction)
end = datetime.now()
print(end - start)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [22]:
df_test.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 2052 entries, (0, 114) to (11, 25922)
Data columns (total 47 columns):
startTime         2052 non-null object
websitePageUrl    2052 non-null object
visitTime         2052 non-null object
engagementTime    2052 non-null object
pageTitle         2052 non-null object
url               2052 non-null object
viewportWidth     2052 non-null object
viewportHeight    2052 non-null object
tags              2052 non-null object
e                 2052 non-null object
ta                771 non-null object
ty                2052 non-null object
x                 2052 non-null object
y                 2052 non-null object
id                2052 non-null object
created           2052 non-null object
lastActivity      2052 non-null object
pages             2052 non-null object
duration          2052 non-null object
engagementTime    2052 non-null object
totalFriction     2052 non-null object
country           2052 non-null object
region            2052

In [23]:
df_test

Unnamed: 0_level_0,Unnamed: 1_level_0,startTime,websitePageUrl,visitTime,engagementTime,pageTitle,url,viewportWidth,viewportHeight,tags,e,...,tags,variables,watched,starred,lng,lat,visitorId,gdpr,visitorName,playbackUrl
idx,t,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,114,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,114,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,250,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,250,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,728,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,728,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,797,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,797,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,799,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,799,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,826,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,826,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,896,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,896,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,958,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,958,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,976,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,976,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
0,1046,2018-06-21T11:39:06.6496909Z,/garten/gartenhaus,42596,38956,Gartenhaus online kaufen | Garten | Holzprofi24,https://www.holzprofi24.de/garten/gartenhaus/?...,1600,805,no-purchase,1046,...,[],[gclid=EAIaIQobChMItf64ttXk2wIVyYXVCh2XegUVEAA...,False,False,53.2412,53.2412,bccb3feca8cd4414eca449dc7e7af77b,False,,https://eu.mouseflow.com/websites/4320374e-0cd...
