In [4]:
import pandas as pd
import json

In [92]:
TEST_DATA = '../test/test_data2.json'
with open(TEST_DATA,'r') as f:
    raw_data = json.loads(f.read())

# Points

In [93]:
sample_data = [x for x in raw_data if x['event'] == 'point']
print(f"Extracted {len(sample_data)} rows from {len(raw_data)} total")

Extracted 402 rows from 505 total


In [94]:
df = pd.json_normalize(sample_data)
df

Unnamed: 0,event,p,name,against,server
0,point,2,PlayerV1,PlayerV1,
1,point,1,PlayerV1,PlayerV1,
2,point,2,PlayerV1,PlayerV1,
3,point,2,PlayerV1,PlayerV1,
4,point,2,PlayerV1,PlayerV1,
...,...,...,...,...,...
397,point,2,PlayerV4,PlayerV4,1.0
398,point,2,PlayerV4,PlayerV4,1.0
399,point,2,PlayerV4,PlayerV4,1.0
400,point,1,PlayerV4,PlayerV4,1.0


## Player 1 vs Player 2 Points

In [95]:
df.groupby('p')['p'].value_counts()

p  p
1  1    174
2  2    228
Name: p, dtype: int64

In [96]:
df.groupby('p')['name'].value_counts()

p  name    
1  PlayerV1    53
   PlayerV4    49
   PlayerV2    43
   PlayerV3    29
2  PlayerV1    78
   PlayerV2    50
   PlayerV3    50
   PlayerV4    50
Name: name, dtype: int64

In [78]:
df.groupby('p')['server'].value_counts()

p  server
1  1.0        86
   2.0        74
2  1.0       114
   2.0        86
Name: server, dtype: int64

## Player Implementation Versions

In [74]:
df.groupby('name')['p'].value_counts()

name      p
PlayerV1  2    78
          1    53
PlayerV2  2    50
          1    43
PlayerV3  2    50
          1    29
PlayerV4  2    50
          1    49
Name: p, dtype: int64

In [68]:
# cross check
data_extract = [x for x in raw_data if x['event'] == 'point' and x['name'] == 'PlayerV1' and x['p'] == 2]
print(len(data_extract))

78


In [79]:
df.groupby('name')['server'].value_counts()

name      server
PlayerV1  1.0       50
          2.0       39
PlayerV2  1.0       50
          2.0       43
PlayerV3  1.0       50
          2.0       29
PlayerV4  1.0       50
          2.0       49
Name: server, dtype: int64

## Server


In [71]:
df.groupby('server')['p'].value_counts()

server  p
1.0     2    114
        1     86
2.0     2     86
        1     74
Name: p, dtype: int64

In [81]:
df.groupby('server')['name'].value_counts()

server  name    
1.0     PlayerV1    50
        PlayerV2    50
        PlayerV3    50
        PlayerV4    50
2.0     PlayerV4    49
        PlayerV2    43
        PlayerV1    39
        PlayerV3    29
Name: name, dtype: int64

In [90]:
# when server wins the point
data_extract = [x for x in raw_data if x['event'] == 'point' and 'server' in x and x['p'] == x['server']]
xdf = pd.json_normalize(data_extract)
xdf.groupby('server')['name'].value_counts()

server  name    
1       PlayerV4    32
        PlayerV1    21
        PlayerV2    20
        PlayerV3    13
2       PlayerV4    32
        PlayerV1    21
        PlayerV2    20
        PlayerV3    13
Name: name, dtype: int64

# Miss Events


In [97]:
sample_data = [x for x in raw_data if x['event'] == 'missed']
print(f"Extracted {len(sample_data)} rows from {len(raw_data)} total")

Extracted 99 rows from 505 total


In [98]:
df = pd.json_normalize(sample_data)
df

Unnamed: 0,event,name,p,ytarget,ball.pos,ball.mov,ball.m,paddle.pos,paddle.dy
0,missed,PlayerV4,2,218,"[441, 220]","[984, 147]",0.15,"[182, 202]",288
1,missed,PlayerV4,1,216,"[-10, 219]","[-1246, 186]",-0.15,"[158, 178]",288
2,missed,PlayerV4,1,24,"[-23, 19]","[-1386, -208]",0.15,"[67, 87]",-288
3,missed,PlayerV4,2,48,"[447, 48]","[1296, -38]",-0.03,"[50, 70]",-288
4,missed,PlayerV4,1,216,"[-5, 219]","[-1156, 173]",-0.15,"[163, 183]",288
...,...,...,...,...,...,...,...,...,...
94,missed,PlayerV4,1,5,"[0, 3]","[-1102, -165]",0.15,"[13, 33]",-288
95,missed,PlayerV4,1,181,"[0, 183]","[-1180, 177]",-0.15,"[161, 181]",288
96,missed,PlayerV4,1,238,"[-1, 237]","[-1025, -153]",0.15,"[216, 236]",288
97,missed,PlayerV4,2,200,"[434, 202]","[1296, 194]",0.15,"[153, 173]",288


## Slope of the ball's line


In [103]:
df.groupby('ball.m')['p'].count()

ball.m
-0.40     5
-0.15    51
-0.03     3
 0.15    35
 0.40     4
 1.00     1
Name: p, dtype: int64

## Player Implementation that missed


In [104]:
df.groupby('name')['p'].count()

name
PlayerV4    99
Name: p, dtype: int64

---
# References

[How to convert JSON into a Pandas DataFrame](https://towardsdatascience.com/how-to-convert-json-into-a-pandas-dataframe-100b2ae1e0d8), Towards Data Science, accessed 11 August 2021

[Pandas tips and tricks: Group by and value_counts](https://towardsdatascience.com/pandas-tips-and-tricks-33bcc8a40bb9), Towards Data Science, accessed 11 August 2021