# Using Pandas DataFrames to explore tabular data

In [2]:
import db
import parser
import pandas as pd
from datetime import datetime, timedelta

In [3]:
dev = db.qa()
begin = datetime.today() + timedelta(days=-30)
data = list(
    dev.AppUpdateLog.find({"createdAt": {"$gt": begin}})
)
parser.pretty(data)

<IPython.core.display.JSON object>

In [62]:
# pd.DataFrame(data)

In [4]:
# Explode array
df = pd.json_normalize(data, "log", record_prefix="log.", meta=['userId', 'clientType', 'status'])

# Reorder columns
df = pd.DataFrame({
    'timestamp': df['log.timestamp'], 
    'user': df['userId'],
    'os': df['clientType'],
    'version': df['log.clientVersion'],
    'status': df['log.status'],
    'finalStatus': df['status']
})

df.sort_values(by=['timestamp'])

Unnamed: 0,timestamp,user,os,version,status,finalStatus
0,2022-09-30 11:46:37.625,6336d434a699e9717dd1dfb6,ios,2.1.0,rejected,succeeded
1,2022-09-30 11:47:51.347,6336d434a699e9717dd1dfb6,ios,2.5.0,succeeded,succeeded
2,2022-09-30 13:33:04.543,6336efd5d6a8402586edb070,ios,2.1.0,rejected,rejected
3,2022-09-30 13:34:22.449,6336efd5d6a8402586edb070,ios,2.1.0,rejected,rejected
4,2022-09-30 13:34:37.483,6336efd5d6a8402586edb070,ios,2.1.0,rejected,rejected
5,2022-09-30 13:36:04.624,6336efd5d6a8402586edb070,ios,2.1.0,rejected,rejected
6,2022-10-03 06:40:19.393,633a7fd999cf56307b8a278c,ios,2.1.0,rejected,succeeded
7,2022-10-03 06:58:53.502,633a7fd999cf56307b8a278c,ios,2.1.0,rejected,succeeded
8,2022-10-03 09:33:19.165,633a7fd999cf56307b8a278c,ios,2.1.0,rejected,succeeded
9,2022-10-03 09:35:03.071,633a7fd999cf56307b8a278c,ios,2.1.0,rejected,succeeded


In [5]:
df[df['status'] == 'succeeded']

Unnamed: 0,timestamp,user,os,version,status,finalStatus
1,2022-09-30 11:47:51.347,6336d434a699e9717dd1dfb6,ios,2.5.0,succeeded,succeeded
14,2022-10-04 08:06:49.099,633a7fd999cf56307b8a278c,ios,2.4.0,succeeded,succeeded
19,2022-10-04 11:29:01.711,633a7fd999cf56307b8a278c,ios,2.5.0,succeeded,succeeded
23,2022-10-04 17:33:08.745,633c1b2bbac87360380842f7,ios,2.5.0,succeeded,succeeded
25,2022-10-04 17:44:34.674,633c1b2bbac87360380842f7,ios,2.5.0,succeeded,succeeded
27,2022-10-06 14:04:35.405,633c1b2bbac87360380842f7,ios,2.3.1,succeeded,succeeded


In [10]:
>>> f'Count: {len(df["user"].unique())}/{len(df)} (users/events)'

'Count: 4/29 (users/events)'

In [7]:
df["version"].unique()

array(['2.1.0', '2.5.0', '2.4.0', '2.3.0', '2.3.1'], dtype=object)

In [8]:
df["version"].value_counts()

2.1.0    21
2.5.0     4
2.3.1     2
2.4.0     1
2.3.0     1
Name: version, dtype: int64

In [9]:
df['date'] = df['timestamp'].dt.date
df.groupby(['date', 'os', 'version', 'status']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,timestamp,user,finalStatus
date,os,version,status,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-09-30,ios,2.1.0,rejected,5,5,5
2022-09-30,ios,2.5.0,succeeded,1,1,1
2022-10-03,ios,2.1.0,rejected,8,8,8
2022-10-04,ios,2.1.0,rejected,8,8,8
2022-10-04,ios,2.4.0,succeeded,1,1,1
2022-10-04,ios,2.5.0,succeeded,3,3,3
2022-10-06,ios,2.3.0,rejected,1,1,1
2022-10-06,ios,2.3.1,rejected,1,1,1
2022-10-06,ios,2.3.1,succeeded,1,1,1
