In [33]:
import pandas as pd
import numpy as np
import pickle
import datetime
import json
import re
import os, gzip, shutil, fnmatch

from dateutil import parser
from tqdm import tqdm

import matplotlib
from zipfile import BadZipfile
from matplotlib import pyplot as plt 

In [34]:
directory = "/Users/farhan/DNL/BuddingScholar/Budding_Scholar_22-23/Data/"
metric_folder = "sensorkit-keyboard-metrics/iPhone"
outputDirectory = "/Users/farhan/Desktop/DNL/Budding_Scholar_22-23/output_tables"

In [35]:

## Iterative decompression
from gzip import BadGzipFile


def gz_extract(directory):
    extension = ".gz"
    os.chdir(directory)
    for item in os.listdir(directory): # loop through items in dir
      if item.endswith(extension): # check for ".gz" extension
          gz_name = os.path.abspath(item) # get full path of files
          file_name = (os.path.basename(gz_name)).rsplit('.',1)[0] #get file name for file within
          try:
            with gzip.open(gz_name,"rb") as f_in, open(file_name,"wb") as f_out:
              # print(gz_name)
              shutil.copyfileobj(f_in, f_out)
            os.remove(gz_name) # delete zipped file
          except BadZipfile:
            continue
          except BadGzipFile:
            continue

## returns a properly formatted word/emojiList
def get_sentiment_list(emojiList: list):
    returnList = [0,0,0,0,0,0,0,0,0,0]

    for i in [0, 2, 4, 6, 8, 10, 12, 14, 16, 18]:
      if (emojiList[i] == 0):
        returnList[0] = int(emojiList[i + 1])
      if (emojiList[i] == 1):
        returnList[1] = int(emojiList[i + 1])
      if (emojiList[i] == 2):
        returnList[2] = int(emojiList[i + 1])
      if (emojiList[i] == 3):
        returnList[3] = int(emojiList[i + 1])
      if (emojiList[i] == 4):
        returnList[4] = int(emojiList[i + 1])
      if (emojiList[i] == 5):
        returnList[5] = int(emojiList[i + 1])
      if (emojiList[i] == 6):  
        returnList[6] = int(emojiList[i + 1])
      if (emojiList[i] == 7):
        returnList[7] = int(emojiList[i + 1])
      if (emojiList[i] == 8):
        returnList[8] = int(emojiList[i + 1])
      if (emojiList[i] == 9):
        returnList[9] = int(emojiList[i + 1])
    return returnList

In [36]:
## Loop over all the exported data folders/directories
from json import JSONDecodeError
keyboardList = []

for folder in os.listdir(directory):
    path = directory + folder + "/" + metric_folder
    
    if folder == ".DS_Store":
        continue
    
    for participant in os.listdir(path):
        ParticipantIdentifier = participant
        pFolder = path + "/" + participant
    
        for data_folder in os.listdir(pFolder):
            
            final_path = pFolder + "/" + data_folder
            
            gz_extract(final_path)

            ## print(path)
            ## Loop over all files in this path/directory
            for fname in os.listdir(final_path):
                
                filename = ""

                ## name of the file
                if fname.endswith("json"):
                    filename = final_path + "/" + fname
                else: 
                    continue
                
                ## Load the JSON File
                file = open(filename)
                
                # print(filename)
                ## Need to use json.load and not json.loads
                loaded_file = ""
                
                try:
                    loaded_file = json.load(file)
                except JSONDecodeError:
                    continue

                ## Get the samples list
                samples = loaded_file["samples"]

                ## Get the name
                name = loaded_file["device"]["name"]

                ## Need a loop here to iterate over all samples
                for i in range(len(samples)):

                    ## Get the TimeStamp for the current sample
                    dt = parser.parse(samples[i]["timestamp"])
                    time = dt.time()
                    trial_date = (dt + datetime.timedelta(hours = -4)).date()

                    ## Get the sample dictionary
                    sample_dict_iterator = samples[i]["sample"]

                    ## Collect sentiment data from this sample
                    totalWordsTemp = sample_dict_iterator["totalWords"];
                    totalEmojisTemp = sample_dict_iterator["totalEmojis"];
                    sentimentDict = sample_dict_iterator["sentimentMetrics"];
                    
                    ## Collect the emoji sentiments
                    emojiSentiments = sentimentDict["emojiCount"];
                    emojiCountList = get_sentiment_list(emojiSentiments)

                    wordSentiments =  sentimentDict["wordCount"]
                    wordCountList =  get_sentiment_list(wordSentiments)

                    ## Collect correction metrics
                    totalAutoCorrections = sample_dict_iterator['totalAutoCorrections']
                    totalTranspositionCorrections = sample_dict_iterator['totalTranspositionCorrections']
                    totalSpaceCorrections = sample_dict_iterator['totalSpaceCorrections']
                    totalSubstitutionCorrections = sample_dict_iterator['totalSubstitutionCorrections']
                    totalInsertKeyCorrections = sample_dict_iterator['totalInsertKeyCorrections']
                    totalNearKeyCorrections = sample_dict_iterator['totalNearKeyCorrections']
                    totalHitTestCorrections = sample_dict_iterator['totalHitTestCorrections']
                    totalRetroCorrections = sample_dict_iterator['totalRetroCorrections']

                    ## Collect other metrics
                    inputModes = sample_dict_iterator['inputModes']
                    totalTaps = sample_dict_iterator['totalTaps']
                    totalTypingEpisodes = sample_dict_iterator['totalTypingEpisodes']
                    totalTypingDuration = sample_dict_iterator['totalTypingDuration']
                    totalPathPauses = sample_dict_iterator['totalPathPauses']
                    totalDrags = sample_dict_iterator['totalDrags']
                    totalDeletes = sample_dict_iterator['totalDeletes']
                    totalPathTime = sample_dict_iterator['totalPathTime']
                    totalAlteredWords = sample_dict_iterator['totalAlteredWords']
                    typingSpeed = 'NaN'
                    totalPauses = 'NaN'

                    if 'typingSpeed' in sample_dict_iterator:
                        typingSpeed = sample_dict_iterator['typingSpeed']
                    if 'totalPauses' in sample_dict_iterator:
                        totalPauses = sample_dict_iterator['totalPauses']

                    tempKeyboardDict = {
                        "name": name,
                        "ParticipantIdentifier": ParticipantIdentifier,
                        'trial_date': trial_date,
                        'time': time,

                        "TotalWords": totalWordsTemp,
                        "ParticipantIdentifier": participant,
                        "timeStamp": dt,
                        "wordAbsolutionist": wordCountList[0], "wordAnger": wordCountList[4], "wordAnxiety": wordCountList[3], 
                        "wordConfused": wordCountList[9], "wordDeath": wordCountList[2], "wordDown": wordCountList[1],
                        "wordHealth": wordCountList[5], "wordLowEnergy": wordCountList[8], "wordPositive": wordCountList[6], 
                        "wordSad": wordCountList[7],

                        "TotalEmojis": totalEmojisTemp,
                        "emojiAbsolutionist": emojiCountList[0], "emojiAnger": emojiCountList[4], "emojiAnxiety": emojiCountList[3], 
                        "emojiConfused": emojiCountList[9], "emojiDeath": emojiCountList[2], "emojiDown": emojiCountList[1],
                        "emojiHealth": emojiCountList[5], "emojiLowEnergy": emojiCountList[8], "emojiPositive": emojiCountList[6],
                        "emojiSad": emojiCountList[7],

                        'totalAutoCorrections': totalAutoCorrections,
                        'totalTranspositionCorrections': totalTranspositionCorrections,
                        'totalSpaceCorrections': totalSpaceCorrections,
                        'totalSubstitutionCorrections': totalSubstitutionCorrections,
                        'totalInsertKeyCorrections': totalInsertKeyCorrections,
                        'totalNearKeyCorrections': totalNearKeyCorrections,
                        'totalHitTestCorrections': totalHitTestCorrections,
                        'totalRetroCorrections': totalRetroCorrections,

                        ## Collect other metrics
                        'inputModes': inputModes,
                        'totalTaps': totalTaps,
                        'totalTypingEpisodes': totalTypingEpisodes,
                        'totalTypingDuration': totalTypingDuration,
                        'totalPathPauses': totalPathPauses,
                        'totalDrags': totalDrags,
                        'totalDeletes': totalDeletes,
                        'totalPathTime': totalPathTime,
                        'totalAlteredWords': totalAlteredWords,
                        'typingSpeed': typingSpeed,
                        'totalPauses': totalPauses,
                    }

                    keyboardList.append(tempKeyboardDict)


In [37]:
keyboardDF = pd.DataFrame(keyboardList)
keyboardDF.to_csv(outputDirectory + '/keyboardFarhan.csv', index=False)
# keyboardDF.head(5)