## Setup and Constants

In [2]:
# Packages
import numpy as np
import pandas as pd
import random
import names

In [3]:
# Constants
NUM_PLAYERS = 3000  # Number of players to create
PITCH_PROB = 11 / 27  # Probability of choosing pitcher
HITTER_PROB = 1 - PITCH_PROB  # Probability of choosing hitter
LEFT_PITCH_PROB = .25  # Probability of choosing left-handed pitcher
LEFT_HIT_PROB = .35  # Probability of choosing left-handed batter
MEAN_MAIN_RATING = 70  # Mean rating for creation of main ratings
STD_MAIN_RATING = 9  # Standard deviation for creation of main ratings
ATTRIBUTE_STD = 6

# Batting attribute weights
BATTING_ATTRIBUTES = {
    "contact": 3,
    "power": 1.5,
    "speed/stealing": 2.5,
    "fielding": 3,
    "eye": 2,
    "avoid ks": 2,
}

# Batting attribute weights
PITCHING_ATTRIBUTES = {"stuff": 2, "movement": 2.5, "control": 3}

### Functions

## Creating Players

In [4]:
def left_right(df: pd.DataFrame) -> str:
    """
    Will return whether a hitter or batter is left or right throwing/hitting

    Args:
        df: Dataframe to modify

    Returns: left or right

    """
    if df["Pitch/Hit"] == "hit":
        return np.random.choice(["left", "right"], p=[LEFT_HIT_PROB, 1 - LEFT_HIT_PROB])

    else:
        return np.random.choice(
            ["left", "right"], p=[LEFT_PITCH_PROB, 1 - LEFT_PITCH_PROB]
        )


def assign_attributes(df: pd.DataFrame) -> dict:
    """
    Assigns a number to each hitter/pitcher's attributes

    Args:
        df: Dataframe to modify

    Returns: Dictionary of attributes for one record
    """
    if df["Pitch/Hit"] == "hit":
        temp_dict = BATTING_ATTRIBUTES.copy()
    else:
        temp_dict = PITCHING_ATTRIBUTES.copy()
    return_dict = {}
    mr = df["Main Rating"]
    for x in range(len(temp_dict)):
        choice = random.choices(list(temp_dict.keys()), list(temp_dict.values()))[0]
        att_rating = np.around(np.random.normal(df["Main Rating"], ATTRIBUTE_STD))
        z_score = (att_rating - mr) / ATTRIBUTE_STD
        return_dict.update({choice: att_rating})
        temp_dict.pop(choice)
        mr -= abs((5 + z_score))

    return return_dict

In [5]:
def create_player_df():
    df_main = pd.DataFrame(
        np.around(np.random.normal(MEAN_MAIN_RATING, STD_MAIN_RATING, NUM_PLAYERS)),
        columns=["Main Rating"],
    )
    df_main["Pitch/Hit"] = np.random.choice(["pitch", "hit"], size=3000, p=[PITCH_PROB, HITTER_PROB])
    df_main["Right/Left"] = np.random.randint(0, 100, NUM_PLAYERS)
    df_main["Right/Left"] = df_main.apply(lambda row: left_right(row), axis=1)
    df_main["Attributes"] = df_main.apply(lambda row: assign_attributes(row), axis=1)
    df_main["Name"] = df_main.apply(lambda row: names.get_full_name(gender="male"), axis=1)

    return df_main

In [7]:
df = create_player_df()

df

Unnamed: 0,Main Rating,Pitch/Hit,Right/Left,Attributes,Name
0,67.0,pitch,right,"{'control': 57.0, 'movement': 72.0, 'stuff': 6...",Victor Williams
1,56.0,hit,left,"{'avoid ks': 59.0, 'fielding': 63.0, 'speed/st...",Patrick Palmer
2,57.0,pitch,right,"{'stuff': 57.0, 'control': 52.0, 'movement': 5...",Randall Stein
3,74.0,pitch,right,"{'movement': 68.0, 'stuff': 79.0, 'control': 6...",James Valadez
4,55.0,hit,right,"{'speed/stealing': 57.0, 'fielding': 52.0, 'av...",Jerry Skelton
...,...,...,...,...,...
2995,72.0,hit,right,"{'eye': 81.0, 'fielding': 71.0, 'avoid ks': 68...",David Appell
2996,73.0,hit,right,"{'fielding': 69.0, 'eye': 73.0, 'contact': 65....",Thomas Singleton
2997,78.0,hit,right,"{'avoid ks': 71.0, 'eye': 83.0, 'speed/stealin...",Donald Contreras
2998,84.0,hit,left,"{'fielding': 97.0, 'eye': 85.0, 'contact': 95....",John Wright
