# GAMES

This will pull game data by year.

Will Create:
```
games_2015.csv
...
all_games.csv
```

[The historical website](https://www.pro-football-reference.com)

[Work spreadsheet for designs](https://docs.google.com/spreadsheets/d/1MkQ_J3gGDs_vkkERELOZojhwS9Wt-1eX4vFL7VP9TQE/edit#gid=0)

In [1]:
import sys
import pandas as pd

sys.path.insert(0, '..')
import utils.game_utils as gu
import utils.fetch_games as fetch_games
import utils.add_aggregates as add_aggregates

data_path = '../../data'

In [None]:
#*
#* KEEPING THESE AS BY-YEAR CALLS
#* to simplify pulling only 1 year
#* when needed.
#*

fetch_games.fetch_and_write_year(2010, data_path)
fetch_games.fetch_and_write_year(2011, data_path)
fetch_games.fetch_and_write_year(2012, data_path)
fetch_games.fetch_and_write_year(2013, data_path)
fetch_games.fetch_and_write_year(2014, data_path)
fetch_games.fetch_and_write_year(2015, data_path)
fetch_games.fetch_and_write_year(2016, data_path)
fetch_games.fetch_and_write_year(2017, data_path)
fetch_games.fetch_and_write_year(2018, data_path)
fetch_games.fetch_and_write_year(2019, data_path)
fetch_games.fetch_and_write_year(2020, data_path)
fetch_games.fetch_and_write_year(2021, data_path)

## write final file
fetch_games.build_all_game_files(data_path)

print("Done. Now start the next step:")

# STATS AND INFO : DATA

Getting info and stats causes a lot of calls. I have built the fetcher to first see if we have the info and stat files and move on if so. This means this function can be run as often as you like -- it will only fetch things we don't have files for already.

In [None]:
import sys

sys.path.insert(0, '..')
import utils.fetch_info_and_stats as fetch_data

#* FETCHES any needed info or stat files
fetch_data.fetch_all_info_and_stats()

#* CREATES `all_games_with_data.csv` by appending all DATA with all_games.csv
fetch_data.create_all_games_with_data()

print("Done.")

# ADD AGGREGATES

Now that we have all the static data in playce, we want to add our own aggregated values. This will give us week-by-week information about the data at the time.

In [3]:
import sys
import pandas as pd

sys.path.insert(0, '..')
import utils.game_utils as gu
import utils.fetch_games as fetch_games
import utils.add_aggregates as add_aggregates

data_path = '../../data'

df = pd.read_csv(f'{data_path}/games/all_games_with_data.csv')
years = df['year'].unique()
for year in years:
  print(f'Processing Year : {year}   -- -- -- -- -- --')
  add_aggregates.create_year_file(year, data_path)
  
print('Done.')


Processing Year : 2010   -- -- -- -- -- --
0 %
1 %
2 %
3 %
4 %
5 %
6 %
7 %
8 %
9 %
10 %
11 %
12 %
13 %
14 %
15 %
16 %
17 %
18 %
19 %
20 %
21 %
22 %
23 %
24 %
25 %
26 %
27 %
28 %
29 %
30 %
31 %
32 %
33 %
34 %
35 %
36 %
37 %
38 %
39 %
40 %
41 %
42 %
43 %
44 %
45 %
46 %
47 %
48 %
49 %
50 %
51 %
52 %
53 %
54 %
55 %
56 %
57 %
58 %
59 %
60 %
61 %
62 %
63 %
64 %
65 %
66 %
67 %
68 %
69 %
70 %
71 %
72 %
73 %
74 %
75 %
76 %
77 %
78 %
79 %
80 %
81 %
82 %
83 %
84 %
85 %
86 %
87 %
88 %
89 %
90 %
91 %
92 %
93 %
94 %
95 %
96 %
97 %
98 %
99 %
100 %
100 %
Processing Year : 2011   -- -- -- -- -- --
0 %
1 %
2 %
3 %
4 %
5 %
6 %
7 %
8 %
9 %
10 %
11 %
12 %
13 %
14 %
15 %
16 %
17 %
18 %
19 %
20 %
21 %
22 %
23 %
24 %
25 %
26 %
27 %
28 %
29 %
30 %
31 %
32 %
33 %
34 %
35 %
36 %
37 %
38 %
39 %
40 %
41 %
42 %
43 %
44 %
45 %
46 %
47 %
48 %
49 %
50 %
51 %
52 %
53 %
54 %
55 %
56 %
57 %
58 %
59 %
60 %
61 %
62 %
63 %
64 %
65 %
66 %
67 %
68 %
69 %
70 %
71 %
72 %
73 %
74 %
75 %
76 %
77 %
78 %
79 %
80 %
81 %
82 %
83 %
84

In [1]:
import sys
import pandas as pd

sys.path.insert(0, '..')
import utils.game_utils as gu
import utils.fetch_games as fetch_games
import utils.add_aggregates as add_aggregates

data_path = '../../data'

# df = add_aggregates.create_all_games_with_data_and_agg(data_path, 2018)
df = add_aggregates.create_year_file(2020)
print('Done.')


Done.


In [12]:
import utils.add_aggregates as add_aggregates

data_path = '../../data'
all_games_df = pd.read_csv(f'{data_path}/games/all_games_with_data.csv')
work_df = add_aggregates.get_year_df(2013, all_games_df)

work_df.iloc[0]


date                                    2013-09-05
year                                          2013
week                                             1
team                                Denver Broncos
team_score                                      49
                                         ...      
opponent_third_down_conversions                  8
opponent_third_down_ratio                     0.36
opponent_fourth_down_count                       1
opponent_fourth_down_conversions                 0
opponent_fourth_down_ratio                     0.0
Name: 1536, Length: 68, dtype: object

In [13]:
work_df.index

Int64Index([1536, 1537, 1538, 1539, 1540, 1541, 1542, 1543, 1544, 1545,
            ...
            2038, 2039, 2040, 2041, 2042, 2043, 2044, 2045, 2046, 2047],
           dtype='int64', length=512)

In [15]:
for index in work_df.index.values:
  print(index)

1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
