# Import Starting Data

In [1]:
import pandas as pd #1
import os           #3

In [2]:
given_data_folder = 'data'

file_names = os.listdir(given_data_folder)
file_names.sort()
file_names

['MRegularSeasonDetailedResults.csv', 'bracket-2022.csv']

In [3]:
file_name = 'MRegularSeasonDetailedResults.csv'

In [4]:
file_path = given_data_folder + '/' + file_name 
given_df  = pd.read_csv(file_path)
print(len(given_df))

given_df.head(5)

100423


Unnamed: 0,Season,DayNum,WTeamID,WScore,LTeamID,LScore,WLoc,NumOT,WFGM,WFGA,...,LFGA3,LFTM,LFTA,LOR,LDR,LAst,LTO,LStl,LBlk,LPF
0,2003,10,1104,68,1328,62,N,0,27,58,...,10,16,22,10,22,8,18,9,2,20
1,2003,10,1272,70,1393,63,N,0,26,62,...,24,9,20,20,25,7,12,8,6,16
2,2003,11,1266,73,1437,61,N,0,24,58,...,26,14,23,31,22,9,12,2,5,23
3,2003,11,1296,56,1457,50,N,0,18,38,...,22,8,15,17,20,9,19,4,3,23
4,2003,11,1400,77,1208,71,N,0,30,61,...,16,17,27,21,15,12,10,7,1,14


# Testing With The Network

In [5]:
import sys
sys.path.append("..")

In [6]:
import random
import time
import threading
import math

from copy import deepcopy

In [7]:
from network_classes.neuron      import Neuron
from network_classes.agent       import Agent
from network_classes.population  import Population

from network_classes.data_prep     import *
from network_classes.testingreport import TestingReport


from extra.progress_package import *

# Get each teams stats from a given season

### 2021 season

In [8]:
game_solutions_2021, team_dictionary_2021, headers_2021 = stats_for_season(given_df, 2021, print_report=False)

### 2022 season

In [9]:
game_solutions_2022, team_dictionary_2022, headers_2022 = stats_for_season(given_df, 2022, print_report=True)

data size: 
-----------
number of games for 2022 season:  3736 
number of columns in base data:   34 
number of rows:                   3736 


column titles and indicis: 
---------------------------
  0: Season      1: DayNum      2: WTeamID     3: WScore      4: LTeamID     5: LScore      6: WLoc      
  7: NumOT       8: WFGM        9: WFGA       10: WFGM3      11: WFGA3      12: WFTM       13: WFTA      
 14: WOR        15: WDR        16: WAst       17: WTO        18: WStl       19: WBlk       20: WPF       
 21: LFGM       22: LFGA       23: LFGM3      24: LFGA3      25: LFTM       26: LFTA       27: LOR       
 28: LDR        29: LAst       30: LTO        31: LStl       32: LBlk       33: LPF       


base stats report: 
-------------------
game_solutions length:    3736 games
team_dictionary length:    358 teams

team_dictionary[team_id] = [# of games played, [stats FOR totals], [stats AGAINST totals]]


example row:    
------------- 

  number of games played by team: 23 

   

## base stats with FOR and AGAINST

### 2021

In [10]:
# get team dictionary using custom stats
# ---------------------------------------
FA_team_dictionary_2021, FA_headers_list_2021 = for_and_against(team_dictionary_2021)


# change team dictionary to averages instead of totals
# -----------------------------------------------------
FA_team_averages_2021 = team_totals_to_averages(FA_team_dictionary_2021, FA_headers_list_2021, print_report=False)


# prepare the final input and solution data
# ------------------------------------------
FA_inputs_2021, FA_solutions_2021 = prep_inputs_and_solutions(FA_team_averages_2021, game_solutions_2021)


# set up final inputs and solutions
# ----------------------------------
inputs_2021    = FA_inputs_2021
solutions_2021 = FA_solutions_2021

### 2022

In [11]:
# get team dictionary using custom stats
# ---------------------------------------
FA_team_dictionary_2022, FA_headers_list_2022 = for_and_against(team_dictionary_2022)


# change team dictionary to averages instead of totals
# -----------------------------------------------------
FA_team_averages_2022 = team_totals_to_averages(FA_team_dictionary_2022, FA_headers_list_2022)


# prepare the final input and solution data
# ------------------------------------------
FA_inputs_2022, FA_solutions_2022 = prep_inputs_and_solutions(FA_team_averages_2022, game_solutions_2022)


# set up final inputs and solutions
# ----------------------------------
inputs_2022    = FA_inputs_2022
solutions_2022 = FA_solutions_2022

# First Test (uses threading)

### variables for stat tracking

### initialize population

In [12]:
TEST_population = Population( len(inputs_2022[0]), 10)

for i in range(10):
    for agent in TEST_population.agents:
        agent.mutate(delete_neurons=True)


In [13]:
report_data = [inputs_2021, solutions_2021,
               inputs_2022, solutions_2022]

current_test = TestingReport(TEST_population)

### Training Loop

In [None]:
testing_time = 20 # (in seocnds)

current_test.run_test(testing_time, report_data)
current_test.print_report()

                                                                                                    [                                                  ] - 0.0%

In [None]:
current_test.print_report()

In [None]:
current_test.scoring_time_report()

In [None]:
TEST_population.num_calculations

In [None]:
current_test.print_top_agent()

In [None]:
current_test.size_report(all_attributes=False)

In [None]:
'''
3 steps

                        kB          neurons     avg size
                        --------    --------    --------
layer[0]                   20.21          26        0.78 
layer[1]                   12.11          18        0.67 
layer[2]                    9.33          12        0.78 
layer[3]                     6.2          16        0.39 
layer[4]                    0.26           1        0.26 

                        kB          mB
                        --------    --------
total_size                 48.43        0.05







''';

In [None]:
#3220*7472
# 10*161*7472 = 12_029_920
# 10*322*7472 = 24_059_840

In [None]:
'''
deletion_time = 0
for i in range(len(TEST_population.agents)):
    deletion_time += TEST_population.agents[i].time_deleting
    
deletion_time
''';

In [None]:
'''
randint_time = 0
for i in range(10000):
    start_time = time.time()
    random.randint(0, 1000)
    randint_time += time.time() - start_time

random_time = 0
for i in range(10000):
    start_time = time.time()
    math.floor(1000*random.random())
    random_time += time.time() - start_time
    
uniform_time = 0
for i in range(10000):
    start_time = time.time()
    random.uniform(0,1)
    uniform_time += time.time() - start_time
    
    
print( "random.randint(a,b) : {}s".format(randint_time ) )
print( "random.random()     : {}s".format(random_time  ) )
print( "random.uniform()    : {}s".format(uniform_time ) )
''';

## Testing with splitting the input data into 10 parts

In [None]:
''' 
split_count = 10

inputs_splits = []
solutions_splits = []

for i in range(split_count):
    actual_length = len(inputs) // 2
    split_length  = (actual_length // split_count) * 2
    
    start_index =  i    * split_length
    end_index   = (i+1) * split_length
    
    inputs_splits.append(    inputs   [start_index:end_index].copy() )
    solutions_splits.append( solutions[start_index:end_index].copy() )
    
    print(len( inputs[start_index:end_index] ))
    
TEST_inputs = inputs[:2615].copy()
TEST_solutions = solutions[:2615].copy()
''';