In [None]:
import json
import pandas as pd 
import numpy as np 

In [None]:
teams = ["Broncos", "Roosters", "Wests Tigers", "Rabbitohs", "Storm", "Eels", "Raiders", "Knights", "Dragons", "Sea Eagles", "Panthers", "Sharks", "Bulldogs", "Dolphins", "Titans", "Cowboys", "Warriors"]
variables =["Year", "Win", "Defense", "Attack", "Margin", "Home", "Versus",  "Round"]
years =  [2023]

In [None]:

# Initialize an empty dictionary to store data for each year
years_arr = {}

# Open the JSON file containing NRL data
with open('../data/nrl_data_multi_years_2023.json', 'r') as file:
    # Load JSON data from the file
    data = json.load(file)
    
    # Extract NRL data from the loaded JSON
    data = data['NRL']
    
    # Iterate over each year in the years list
    for year in years:
        # Extract data for the current year and store it in the years_arr dictionary
        # Note: years.index(year) returns the index of the current year in the years list
        #       This index is then used to access the corresponding data for that year
        years_arr[year] = data[years.index(year)][str(year)]


In [None]:
# Create a DataFrame with columns representing combinations of team and variable names
df = pd.DataFrame(columns=[f"{team} {variable}" for team in teams for variable in variables])

In [None]:

# Initialize an empty list to store data for all rounds
all_store = []

# Iterate over each year in the years list
for year in years:
    # Iterate over each round (assuming 26 rounds)
    for round in range(0, 26):
        try:
            # Extract data for the current round
            round_data = years_arr[year][round][str(round+1)]
            
            # Create an empty feature array 
            round_store = np.zeros([len(teams)*len(variables)], dtype=int)
            round_teams = []
            
            # Iterate over each game in the round data
            for game in round_data:
                # Extract information about the game
                h_team = game['Home']
                h_score = int(game['Home_Score'])
                a_team = game['Away']
                a_score = int(game['Away_Score'])
                
                # Determine win or lose for each team
                h_team_win, a_team_win = h_score >= a_score, a_score >= h_score
                
                # Determine home team status
                h_home, a_home = 1, 0
                
                # Determine versus index
                h_versus, a_versus= teams.index(a_team), teams.index(h_team)
                
                # Determine defense (points let in)
                h_team_defense = a_score
                a_team_defense = h_score  
                
                # Determine attack points scored
                h_team_attack = h_score 
                a_team_attack = a_score   
                
                # Determine margin
                h_team_margin =  h_score - a_score   
                a_team_margin =  a_score - h_score        
                
                # Keep track of which teams played to work out which teams had a bye 
                round_teams.append(h_team)
                round_teams.append(a_team)
                
                # Find the index of the team in the overarching array 
                a_team_idx = teams.index(a_team)
                h_team_idx = teams.index(h_team)
                
                # Determine feature map index
                a_team_idx_fm = a_team_idx * len(variables)
                h_team_idx_fm = h_team_idx * len(variables)
                
                # Populate data for away team
                for idx, data in zip(range(a_team_idx_fm, a_team_idx_fm + 8), 
                                    [year, a_team_win, a_team_defense, a_team_attack, a_team_margin, a_home, a_versus, round + 1]):
                    round_store[idx] = data

                # Populate data for home team
                for idx, data in zip(range(h_team_idx_fm, h_team_idx_fm + 8),
                                    [year, h_team_win, h_team_defense, h_team_attack, h_team_margin, h_home, h_versus, round + 1]):
                    round_store[idx] = data
                
            # Determine teams with a bye
            bye_teams = list(set(teams) - set(round_teams))
            
            # Assign values for teams with a bye
            for bye_team in bye_teams:
                b_team_idx = teams.index(bye_team)
                b_team_idx_fm = b_team_idx * len(variables)
                round_store[b_team_idx_fm] = year
                round_store[b_team_idx_fm+1] = -1
                round_store[b_team_idx_fm+2] = -1
                round_store[b_team_idx_fm+3] = -1
                round_store[b_team_idx_fm+4] = 0
                round_store[b_team_idx_fm+5] = -1
                round_store[b_team_idx_fm+6] = -1
                round_store[b_team_idx_fm+7] = round+1
                
            # Append the round data to the all_store list
            all_store.append(round_store)
            
            # Add the new row to the DataFrame using loc
            df.loc[len(df)] = round_store
        except Exception as ex:
            print(ex)

In [None]:
df

In [None]:
df.to_csv('../data/txt/match_data_2023.txt', sep='\t', index=False)

In [None]:
df.to_csv('../data/txt/match_data_2023.csv', sep='\t', index=False)