This notebook contains the visual representation of creating the other tables and expanding some existing tables.  Uses FiveThirtyEight's common names dataset to randomly generate names.

Datasets: 

(1) https://www.kaggle.com/zynicide/nfl-football-player-stats  
(2) https://www.kaggle.com/fivethirtyeight/fivethirtyeight-most-common-name-dataset

In [9]:
import pickle
import pandas as pd
import numpy as np

from random import sample
from random import shuffle

## Re-open Old Pickles

In [2]:
with open(r'../Data/Pickles/offense.pickle', 'rb') as f:
    offense = pickle.load(f)

In [3]:
with open(r'../Data/Pickles/defense.pickle', 'rb') as f:
    defense = pickle.load(f)

In [4]:
offense.head()

Unnamed: 0,gameNumber,passYards,rushYards,recYards,interceptions,touchdowns,playerID,position
4,1,0,0,49,0,0,Emmanuel Sanders,WR
5,2,0,0,39,0,0,Emmanuel Sanders,WR
6,3,0,0,117,0,2,Emmanuel Sanders,WR
7,4,0,0,88,0,1,Emmanuel Sanders,WR
8,5,0,0,80,0,0,Emmanuel Sanders,WR


In [5]:
defense.head()

Unnamed: 0,playerID,gameNumber,sacks,interceptions,touchdowns,safeties,blocks
0,Arizona Cardinals,1,2.0,0,0,0,0
1,Arizona Cardinals,2,2.0,2,0,0,0
2,Arizona Cardinals,3,2.0,1,0,1,0
3,Arizona Cardinals,4,2.0,0,0,0,0
4,Arizona Cardinals,5,2.0,0,0,0,0


## Create users

In [6]:
names = pd.read_csv(r'../Data/Datasets/names.csv')

In [7]:
names = names[['FirstName', 'Surname']]
fnames = list(np.unique(names['FirstName'].to_numpy()))
lnames = list(np.unique(names['Surname'].to_numpy()))

In [8]:
print(f'{len(fnames)} first names')
print(f'{len(lnames)} last names')

20 first names
20 last names


To generate 20 names, let's sample 4 last names and 5 first names

In [10]:
sample_fnames = sample(fnames, 5)
sample_lnames = sample(lnames, 4)

list(zip(sample_fnames, sample_lnames))

[('Linda', 'Moore'),
 ('Robert', 'Thomas'),
 ('Susan', 'Jones'),
 ('Richard', 'Miller')]

In [11]:
generated_names = []
for f in sample_fnames:
    for l in sample_lnames:
        generated_names.append(f + ' ' + l)

In [12]:
print(generated_names)

['Linda Moore', 'Linda Thomas', 'Linda Jones', 'Linda Miller', 'Robert Moore', 'Robert Thomas', 'Robert Jones', 'Robert Miller', 'Susan Moore', 'Susan Thomas', 'Susan Jones', 'Susan Miller', 'Richard Moore', 'Richard Thomas', 'Richard Jones', 'Richard Miller', 'James Moore', 'James Thomas', 'James Jones', 'James Miller']


In [13]:
shuffle(generated_names)
print(generated_names)

['James Miller', 'Susan Thomas', 'Richard Miller', 'Robert Thomas', 'James Moore', 'James Thomas', 'Robert Jones', 'Linda Miller', 'Richard Thomas', 'Robert Miller', 'Linda Jones', 'Susan Miller', 'Susan Jones', 'James Jones', 'Robert Moore', 'Susan Moore', 'Richard Moore', 'Linda Moore', 'Richard Jones', 'Linda Thomas']


## Make "league" Table

In [14]:
league1 = generated_names[:10]
league2 = generated_names[10:]

In [15]:
league = pd.DataFrame([[1, 'family'], [2, 'competitive']], columns=['leagueID', 'leagueType'])
league.set_index('leagueID')

Unnamed: 0_level_0,leagueType
leagueID,Unnamed: 1_level_1
1,family
2,competitive


In [16]:
league.to_csv(r'../Data/CSV/leagues.csv', header=False)

## Sample players to help form contracts

In [17]:
l1_qbs = sample(list(offense[offense['position'] == 'QB']['playerID'].unique()), len(league1))
l1_rbs = sample(list(offense[offense['position'] == 'RB']['playerID'].unique()), 2*len(league1))
l1_wrs = sample(list(offense[offense['position'] == 'WR']['playerID'].unique()), 2*len(league1))
l1_defense = sample(list(defense['playerID'].unique()), len(league1))

In [18]:
l2_qbs = sample(list(offense[offense['position'] == 'QB']['playerID'].unique()), len(league2))
l2_rbs = sample(list(offense[offense['position'] == 'RB']['playerID'].unique()), 2*len(league2))
l2_wrs = sample(list(offense[offense['position'] == 'WR']['playerID'].unique()), 2*len(league2))
l2_defense = sample(list(defense['playerID'].unique()), len(league2))

## Make "contract" table

In [19]:
contract = pd.DataFrame(l1_qbs+l1_rbs+l1_wrs+l1_defense+l2_qbs+l2_rbs+l2_wrs+l2_defense, columns=['playerID'])
contract['owner'] = league1 + 2*league1 + 2*league1 + league1 + league2 + 2*league2 + 2*league2 + league2
contract['startWeek'] = 1
contract['endWeek'] = 16
contract['isActive'] = 1

In [20]:
contract

Unnamed: 0,playerID,owner,startWeek,endWeek,isActive
0,Andrew Luck,James Miller,1,16,1
1,Marcus Mariota,Susan Thomas,1,16,1
2,Andy Dalton,Richard Miller,1,16,1
3,Drew Stanton,Robert Thomas,1,16,1
4,Matthew Stafford,James Moore,1,16,1
...,...,...,...,...,...
115,New Orleans Saints,Susan Moore,1,16,1
116,San Francisco 49ers,Richard Moore,1,16,1
117,Tampa Bay Buccaneers,Linda Moore,1,16,1
118,Detroit Lions,Richard Jones,1,16,1


## Create "user" table

In [21]:
user = pd.DataFrame([(name.split(' ')[0], name.split(' ')[1]) for name in generated_names], 
                    columns=['nameFirst', 'nameLast'])

In [22]:
user['userID'] = list(range(len(generated_names)))

In [23]:
user.head()

Unnamed: 0,nameFirst,nameLast,userID
0,James,Miller,0
1,Susan,Thomas,1
2,Richard,Miller,2
3,Robert,Thomas,3
4,James,Moore,4


In [26]:
user.to_csv(r'../Data/CSV/users.csv', index=False, header=False)

## Create "team" table

In [27]:
leagues1 = {name: 1 for name in league1}
leagues2 = {name: 2 for name in league2}

leagues1.update(leagues2)

In [28]:
teams = pd.DataFrame([(f'Team{number}', name, leagues1[name]) for number, name in enumerate(generated_names)],
                     columns=['teamID', 'userID', 'leagueID'])

In [29]:
teams

Unnamed: 0,teamID,userID,leagueID
0,Team0,James Miller,1
1,Team1,Susan Thomas,1
2,Team2,Richard Miller,1
3,Team3,Robert Thomas,1
4,Team4,James Moore,1
5,Team5,James Thomas,1
6,Team6,Robert Jones,1
7,Team7,Linda Miller,1
8,Team8,Richard Thomas,1
9,Team9,Robert Miller,1


In [30]:
teams.to_csv(r'../Data/CSV/team.csv', index=False, header=False)

## Merge teams and contracts and drop names

In [31]:
contract

Unnamed: 0,playerID,owner,startWeek,endWeek,isActive
0,Andrew Luck,James Miller,1,16,1
1,Marcus Mariota,Susan Thomas,1,16,1
2,Andy Dalton,Richard Miller,1,16,1
3,Drew Stanton,Robert Thomas,1,16,1
4,Matthew Stafford,James Moore,1,16,1
...,...,...,...,...,...
115,New Orleans Saints,Susan Moore,1,16,1
116,San Francisco 49ers,Richard Moore,1,16,1
117,Tampa Bay Buccaneers,Linda Moore,1,16,1
118,Detroit Lions,Richard Jones,1,16,1


In [32]:
real_contracts = pd.merge(contract, teams.rename({'userID': 'owner'}, axis=1), on='owner').drop('owner', axis=1)

In [33]:
real_contracts.to_csv(r'../Data/CSV/contracts.csv', index=False, header=False)