In [4]:
import pandas as pd
import json

In [105]:
TEST_DATA = '../test/test_data3.json'
with open(TEST_DATA,'r') as f:
    raw_data = json.loads(f.read())

# Points

In [106]:
sample_data = [x for x in raw_data if x['event'] == 'point']
print(f"Extracted {len(sample_data)} rows from {len(raw_data)} total")

Extracted 1253 rows from 2522 total


In [107]:
df = pd.json_normalize(sample_data)
df

Unnamed: 0,event,p,name,against,server,volley_time
0,point,1,PlayerV1,PlayerV1,1,
1,point,2,PlayerV1,PlayerV1,2,30.28
2,point,1,PlayerV1,PlayerV1,1,
3,point,2,PlayerV1,PlayerV1,2,34.63
4,point,1,PlayerV1,PlayerV1,1,
...,...,...,...,...,...,...
1248,point,1,PlayerV4,PlayerV4,1,
1249,point,1,PlayerV4,PlayerV4,2,
1250,point,1,PlayerV4,PlayerV4,2,
1251,point,1,PlayerV4,PlayerV4,2,


## Player 1 vs Player 2 Points

In [108]:
df.groupby('p')['p'].value_counts()

p  p
1  1    587
2  2    666
Name: p, dtype: int64

In [109]:
df.groupby('p')['name'].value_counts()

p  name    
1  PlayerV3    172
   PlayerV4    157
   PlayerV2    136
   PlayerV1    122
2  PlayerV3    200
   PlayerV4    185
   PlayerV2    142
   PlayerV1    139
Name: name, dtype: int64

In [110]:
df.groupby('p')['server'].value_counts()

p  server
1  1         298
   2         289
2  1         376
   2         290
Name: server, dtype: int64

## Player Implementation Versions

In [111]:
df.groupby('name')['p'].value_counts()

name      p
PlayerV1  2    139
          1    122
PlayerV2  2    142
          1    136
PlayerV3  2    200
          1    172
PlayerV4  2    185
          1    157
Name: p, dtype: int64

In [112]:
# cross check
data_extract = [x for x in raw_data if x['event'] == 'point' and x['name'] == 'PlayerV1' and x['p'] == 2]
print(len(data_extract))

139


In [113]:
df.groupby('name')['server'].value_counts()

name      server
PlayerV1  1         139
          2         122
PlayerV2  2         144
          1         134
PlayerV3  1         208
          2         164
PlayerV4  1         193
          2         149
Name: server, dtype: int64

## Server


In [114]:
df.groupby('server')['p'].value_counts()

server  p
1       2    376
        1    298
2       2    290
        1    289
Name: p, dtype: int64

In [115]:
df.groupby('server')['name'].value_counts()

server  name    
1       PlayerV3    208
        PlayerV4    193
        PlayerV1    139
        PlayerV2    134
2       PlayerV3    164
        PlayerV4    149
        PlayerV2    144
        PlayerV1    122
Name: name, dtype: int64

In [116]:
# when server wins the point
data_extract = [x for x in raw_data if x['event'] == 'point' and 'server' in x and x['p'] == x['server']]
xdf = pd.json_normalize(data_extract)
xdf.groupby('server')['name'].value_counts()

server  name    
1       PlayerV1    83
        PlayerV4    79
        PlayerV3    69
        PlayerV2    67
2       PlayerV1    83
        PlayerV2    75
        PlayerV4    71
        PlayerV3    61
Name: name, dtype: int64

# Miss Events


In [117]:
sample_data = [x for x in raw_data if x['event'] == 'missed']
print(f"Extracted {len(sample_data)} rows from {len(raw_data)} total")

Extracted 1253 rows from 2522 total


In [118]:
df = pd.json_normalize(sample_data)
df

Unnamed: 0,event,name,p,ball.pos,ball.mov,ball.speed,ball.m,paddle.top,paddle.bot,paddle.dy,ytarget
0,missed,PlayerV1,2,"[435, 69]","[442, -442]",626.25,-1.00,105,125,-288,
1,missed,PlayerV1,1,"[-1, 178]","[-686, 686]",971.16,-1.00,117,137,288,
2,missed,PlayerV1,2,"[433, 155]","[668, -668]",946.10,-1.00,150,170,288,
3,missed,PlayerV1,1,"[-1, 209]","[-710, -710]",1004.28,1.00,136,156,288,
4,missed,PlayerV1,2,"[434, 124]","[813, 325]",875.66,0.40,95,115,288,
...,...,...,...,...,...,...,...,...,...,...,...
1248,missed,PlayerV4,2,"[434, 17]","[775, 310]",835.05,0.40,20,40,-288,12.0
1249,missed,PlayerV4,2,"[447, 221]","[1296, -518]",1395.83,-0.40,155,175,288,231.0
1250,missed,PlayerV4,2,"[446, 172]","[1296, 194]",1310.50,0.15,153,173,288,169.0
1251,missed,PlayerV4,2,"[446, 164]","[1296, 518]",1395.83,0.40,139,159,288,154.0


## Slope of the ball's line


In [119]:
df.groupby('ball.m')['p'].count()

ball.m
-1.00    173
-0.40    257
-0.15    184
-0.03     11
 0.03      4
 0.15    202
 0.40    248
 1.00    174
Name: p, dtype: int64

## Player Implementation that missed


In [120]:
df.groupby('name')['p'].count()

name
PlayerV1    397
PlayerV2    343
PlayerV3    204
PlayerV4    309
Name: p, dtype: int64

---
# References

[How to convert JSON into a Pandas DataFrame](https://towardsdatascience.com/how-to-convert-json-into-a-pandas-dataframe-100b2ae1e0d8), Towards Data Science, accessed 11 August 2021

[Pandas tips and tricks: Group by and value_counts](https://towardsdatascience.com/pandas-tips-and-tricks-33bcc8a40bb9), Towards Data Science, accessed 11 August 2021