## Setup and Constants

In [47]:
# Packages
import numpy as np
import pandas as pd
import random
import names

In [48]:
# Constants
NUM_PLAYERS = 3000  # Number of players to create
PITCH_PROB = 11 / 27  # Probability of choosing pitcher
HITTER_PROB = 1 - PITCH_PROB  # Probability of choosing hitter
LEFT_PITCH_PROB = 0.25  # Probability of choosing left-handed pitcher
LEFT_HIT_PROB = 0.35  # Probability of choosing left-handed batter
MEAN_MAIN_RATING = 70  # Mean rating for creation of main ratings
STD_MAIN_RATING = 9  # Standard deviation for creation of main ratings
ATTRIBUTE_STD = 6

# Batting attribute weights
BATTING_ATTRIBUTES = {
    "contact": 3,
    "power": 1.5,
    "speed/stealing": 2.5,
    "fielding": 3,
    "eye": 2,
    "avoid ks": 2,
}

# Batting attribute weights
PITCHING_ATTRIBUTES = {"stuff": 2, "movement": 2.5, "control": 3}

### Functions

## Creating Players

In [49]:
def left_right(df: pd.DataFrame) -> str:
    """
    Will return whether a hitter or batter is left or right throwing/hitting

    Args:
        df: Dataframe to modify

    Returns: left or right

    """
    if df["Pitch/Hit"] == "hit":
        return np.random.choice(["left", "right"], p=[LEFT_HIT_PROB, 1 - LEFT_HIT_PROB])

    else:
        return np.random.choice(
            ["left", "right"], p=[LEFT_PITCH_PROB, 1 - LEFT_PITCH_PROB]
        )


def assign_attributes(df: pd.DataFrame) -> dict:
    """
    Assigns a number to each hitter/pitcher's attributes

    Args:
        df: Dataframe to modify

    Returns: Dictionary of attributes for one record
    """
    if df["Pitch/Hit"] == "hit":
        temp_dict = BATTING_ATTRIBUTES.copy()
    else:
        temp_dict = PITCHING_ATTRIBUTES.copy()
    return_dict = {}
    mr = df["Main Rating"]
    for x in range(len(temp_dict)):
        choice = random.choices(list(temp_dict.keys()), list(temp_dict.values()))[0]
        att_rating = np.around(np.random.normal(df["Main Rating"], ATTRIBUTE_STD))
        z_score = (att_rating - mr) / ATTRIBUTE_STD
        return_dict.update({choice: att_rating})
        temp_dict.pop(choice)
        mr -= abs((5 + z_score))

    return return_dict

In [50]:
# Create dataframe of one column of normal distributed ratings around 70
df_main = pd.DataFrame(
    np.around(np.random.normal(MEAN_MAIN_RATING, STD_MAIN_RATING, NUM_PLAYERS)),
    columns=["Main Rating"],
)

In [51]:
df_main["Pitch/Hit"] = np.random.choice(
    ["pitch", "hit"], size=3000, p=[PITCH_PROB, HITTER_PROB]
)

In [52]:
df_main["Right/Left"] = np.random.randint(0, 100, NUM_PLAYERS)
df_main["Right/Left"] = df_main.apply(lambda row: left_right(row), axis=1)

In [53]:
df_main["Attributes"] = df_main.apply(lambda row: assign_attributes(row), axis=1)

In [54]:
df_main["Name"] = df_main.apply(lambda row: names.get_full_name(gender="male"), axis=1)

In [55]:
df_main.describe(include="all")

Unnamed: 0,Main Rating,Pitch/Hit,Right/Left,Attributes,Name
count,3000.0,3000,3000,3000,3000
unique,,2,2,2970,2969
top,,hit,right,"{'control': 76.0, 'stuff': 77.0, 'movement': 8...",John Sanchez
freq,,1793,2110,3,3
mean,69.733667,,,,
std,8.833879,,,,
min,38.0,,,,
25%,64.0,,,,
50%,70.0,,,,
75%,76.0,,,,


In [56]:
df_main.sample(4)

Unnamed: 0,Main Rating,Pitch/Hit,Right/Left,Attributes,Name
2372,88.0,pitch,right,"{'control': 75.0, 'stuff': 82.0, 'movement': 9...",Keith Crockett
1090,79.0,hit,right,"{'power': 86.0, 'eye': 79.0, 'avoid ks': 81.0,...",Garrett Washington
1260,74.0,hit,left,"{'fielding': 73.0, 'power': 83.0, 'speed/steal...",Stanley Kasprowicz
1244,70.0,hit,right,"{'speed/stealing': 75.0, 'fielding': 66.0, 'av...",Robert Benge


In [57]:
df_main

Unnamed: 0,Main Rating,Pitch/Hit,Right/Left,Attributes,Name
0,80.0,pitch,left,"{'control': 79.0, 'stuff': 81.0, 'movement': 7...",Jonathan Garvey
1,67.0,hit,right,"{'avoid ks': 65.0, 'eye': 75.0, 'contact': 65....",Allen Robbins
2,64.0,pitch,right,"{'movement': 59.0, 'control': 61.0, 'stuff': 6...",Rodney Bates
3,81.0,pitch,right,"{'control': 90.0, 'movement': 82.0, 'stuff': 9...",William Herbert
4,85.0,hit,right,"{'speed/stealing': 92.0, 'eye': 85.0, 'contact...",James Lockett
...,...,...,...,...,...
2995,74.0,hit,right,"{'eye': 65.0, 'avoid ks': 80.0, 'fielding': 76...",Stephen Naquin
2996,71.0,pitch,right,"{'stuff': 66.0, 'control': 77.0, 'movement': 7...",Richard Masters
2997,67.0,hit,right,"{'eye': 72.0, 'fielding': 61.0, 'power': 71.0,...",Patrick Keller
2998,59.0,pitch,left,"{'control': 73.0, 'stuff': 56.0, 'movement': 6...",Henry Smith
