# 2021-22 National Circuit VPF Statistical Visualization
*by Samarth Chitgopekar, github: http-samc*

## Start off by importing our dependencies: plotly (for graphs), json (to read our compiled data), and requests (to get archived data)

In [175]:
import json
import numpy as np
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import plotly.graph_objs as go

## Jupyter Setup

In [176]:
init_notebook_mode(connected=True)

## Read our data from `./archive/2020-21 MASTER.json` into an in-memory dictionary

In [177]:
with open("archive/2020-21 MASTER.json", 'r') as f:
    data = json.loads(f.read())

# Plot 1: Correlation between number of bids and OTR Score

## Create our independent data (# of bids) and dependent data (OTR Score)

In [178]:
independentDataList: list = []
dependentDataList: list = []

for team in data.values():
    independentDataList.append(team["goldBids"] + team["silverBids"]/2) # Silver bids worth 1/2 as much
    dependentDataList.append(team["otrScore"])

## Convert Python Lists to Numpy Arrays

In [179]:
independentData = np.array(independentDataList)
dependentData = np.array(dependentDataList)

## Get data for line of best fit

In [180]:
m, b = np.polyfit(independentData, dependentData, 1)
LRX = [0, 13]
LRY = [m*0 + b, m*13 + b]

## Create our scatter plot with Plotly

In [181]:
layout = go.Layout(
    title = "Number of TOC Bids vs. OTR Score: 2020-21 VPF National Circuit", 
    xaxis = {"title": "Number of Bids"}, 
    yaxis = {"title": "OTR Score"}
)

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(
    x = independentData,
    y = dependentData,
    mode = 'markers',
    name = "Competing Team"
))

fig.add_trace(go.Scatter(
    x = LRX,
    y = LRY,
    mode = 'lines', 
    name = f"Linear Regression (m: {round(m, 3)})"
))

iplot(fig)

# Plot 2: Correlation between OTR Score and Speaker Points (adj. avg)

## Create our independent data (OTR Score) and dependent data (adj. avg speaker points)

In [182]:
independentDataList: list = []
dependentDataList: list = []

for team in data.values():
    speaks = []
    for tournament in team["tournaments"]:
        noSpeaksTourn = False
        tournamentScores = []
        for speaker in tournament["speaks"]:
            if not speaker["adjAVG"]: noSpeaksTourn = True
            tournamentScores.append(speaker["adjAVG"])
        if noSpeaksTourn: continue
        speaks.append(sum(tournamentScores)/len(tournamentScores))
    if not len(speaks): continue
    dependentDataList.append(team["otrScore"])
    independentDataList.append(sum(speaks)/len(speaks))

## Convert Python Lists to Numpy Arrays

In [183]:
independentData = np.array(independentDataList)
dependentData = np.array(dependentDataList)

## Get data for line of best fit

In [184]:
m, b = np.polyfit(independentData, dependentData, 1)
LRX = [0, 5]
LRY = [m*0 + b, m*5 + b]

## Create our scatter plot with Plotly

In [185]:
layout = go.Layout(
    title = "OTR Score vs. Average Adjusted Speaker Points: 2020-21 VPF National Circuit", 
    xaxis = {"title": "OTR Score"},
    yaxis = {"title": "Average Adjusted Speaker Points"}, 
)

fig = go.Figure(layout=layout)

fig.add_trace(go.Scatter(
    x = dependentData,
    y = independentData,
    mode = 'markers',
    name = "Competing Team"
))

fig.add_trace(go.Scatter(
    x = LRX,
    y = LRY,
    mode = 'lines', 
    name = f"Linear Regression (m: {round(m, 3)})"
))

iplot(fig)