# A06. Weather 

Note: All historic Park and Weather Factors files are created from M01. Park and Weatehr Factors.ipynb upon the training of new models. A06. Weather is for daily files only.

### Imports

In [4]:
if not hasattr(sys.modules['__main__'], '__file__'):
    %run "C:\Users\james\Documents\MLB\Code\U1. Imports.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U2. Utilities.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U3. Classes.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U4. Datasets.ipynb"
    %run "C:\Users\james\Documents\MLB\Code\U5. Models.ipynb"
    print("Imports executed")

Imports executed


### Games

In [6]:
if not hasattr(sys.modules['__main__'], '__file__'):
    # Set date range 
    start_date = todaysdate
    end_date = todaysdate
    game_df = create_games(start_date, end_date, team_map)
    print("game_df created.")

game_df created.


### Functions

##### 1. Swish Analytics

Swish Analytics contains weather projections to be used before MLB Stats API updates theirs.

Scrape

In [11]:
# Scrape Swish Analytics for weather data
def swishanalytics(date):
    # Reformat date to fit URL
    date_dash = f"{date[:4]}-{date[4:6]}-{date[6:8]}"
    
    # Swish Analytics URL 
    url = "https://swishanalytics.com/mlb/weather?date=" + date_dash

     # Send a GET request to the URL
    response = requests.get(url)
    
    # Check if the request was successful
    if response.status_code == 200:
        # Parse the HTML content
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Find all divs with the class 'weather-card'
        weather_cards = soup.find_all('div', class_='weather-card')
        
        # Initialize an empty list to store DataFrames
        dfs = []
        
        # Iterate over each weather card
        for weather_card in weather_cards:
            # Extract relevant information from the weather card
            time_info = weather_card.find('small', class_='text-muted')
            location_info = weather_card.find('h4', class_='lato inline vert-mid bold')
            
            # Extract time and location information
            time = time_info.text.strip() if time_info else None
            location = location_info.text.strip() if location_info else None
            
            # Find the table within the weather card
            table = weather_card.find('table', class_='table-bordered')
            
            # If table exists, extract data from it
            if table:
                # Extract table data into a list of lists
                rows = table.find_all('tr')
                data = []
                for row in rows:
                    cells = row.find_all(['th', 'td'])
                    row_data = [cell.text.strip() for cell in cells]
                    data.append(row_data)
                
                # Convert data into a pandas DataFrame
                df = pd.DataFrame(data)
                
                # Set the first row as the column headers
                df.columns = df.iloc[0]
                df = df[1:]  # Remove the first row since it's the header row
                
                # Add time and location as additional columns
                df['Time'] = time
                df['Location'] = location

                # Create dataframem from the second time period scraped
                daily_weather_df = pd.DataFrame(df.iloc[:, 2]).T
                # Extract home team name 
                daily_weather_df['Matchup'] = df['Location'][1]
                daily_weather_df['FANGRAPHSTEAM'] = daily_weather_df['Matchup'].str.split("@", expand=True).iloc[:, 1]
                daily_weather_df['FANGRAPHSTEAM'] = daily_weather_df['FANGRAPHSTEAM'].str.replace("\xa0\xa0", "")

                dfs.append(daily_weather_df)
        
    else:
        print("Failed to retrieve the page. Status code:", response.status_code)

    # Append together dataframes
    df = pd.concat(dfs, axis=0)
    
    # Rename columns
    df.rename(columns={1:'Weather', 2:'Temperature', 3:'Feels Like', 4:'Humidity', 5:'Speed', 6:'Direction', 'BBREFTEAM': 'home_team'}, inplace=True)

    # Clean
    df['Speed'] = df['Speed'].str.replace(" mph", "").astype(float)
    df['Temperature'] = df['Temperature'].str.replace('°', '')
    df['Feels Like'] = df['Feels Like'].str.replace('°', '')
    df.reset_index(drop=False, inplace=True, names='Time')

    
    return df

##### 2. RotoGrinders

RotoGrinders hosts weather warnings used to identify matchups to avoid based on weather risk.

Scrape

In [15]:
def rotogrinders(date, team_map):
    # URL of the web page containing the table
    url = "https://rotogrinders.com/weather/mlb"

    # Send a GET request to the URL and retrieve the response
    response = requests.get(url)

    # Check if the response is successful (status code 200)
    if response.status_code == 200:
        # Get the HTML content from the response
        html_content = response.text

        soup = BeautifulSoup(html_content, "html.parser")

        # Find all <li> elements within the <ul>
        li_elements = soup.find_all("li", class_="weather-blurb")

        # Create an empty list to store the data
        data = []

        for li_element in li_elements:
            # Extract the tag colors from the <span> elements
            tag_elements = li_element.find_all("span", class_=["green", "yellow", "orange", "red"])
        
            # Extract the first tag color
            tag = tag_elements[0].text.strip() if tag_elements else None
        
            # Extract the second tag color if it exists
            tag2 = tag_elements[1].text.strip() if len(tag_elements) > 1 else None
        
            # Extract the matchup from the <span> element with class "bold"
            matchup_span = li_element.find("span", class_="bold")
            matchup = matchup_span.text.strip() if matchup_span else None
        
            # Extract the description if it exists
            if matchup_span:
                description_span = matchup_span.find_next_sibling("span")
                description = description_span.text.strip() if description_span else None
            else:
                description = None
        
            # Append the data to the list
            data.append({"Tag": tag, "Tag2": tag2, "Matchup": matchup, "Description": description})


        # Convert the list of dictionaries to a DataFrame
        df = pd.DataFrame(data)

        df[['away', 'home']] = df['Matchup'].str.split(" @ ", expand=True)

        # Add in DK team abbreviations 
        df = df.merge(team_map[['ROTOGRINDERSTEAM', 'DKTEAM']], left_on=['away'], right_on=['ROTOGRINDERSTEAM'], how='left', suffixes=("", "_away"))
        df = df.merge(team_map[['ROTOGRINDERSTEAM', 'DKTEAM']], left_on=['home'], right_on=['ROTOGRINDERSTEAM'], how='left', suffixes=("", "_home"))
        df = df[['Tag', 'Tag2', 'Matchup', 'DKTEAM', 'DKTEAM_home', 'Description']]
        df.rename(columns={'DKTEAM':'Away', 'DKTEAM_home': 'Home'}, inplace=True)
        
        # Add the date column to the DataFrame
        df['date'] = date

        return df
    else:
        # Return an error message if the response is not successful
        return "Failed to retrieve data. Response status code: {}".format(response.status_code)

##### 3. Park x Weather Factors

Reverse wind to direction to, not from

In [18]:
# This reverses winds so that they're named for where they're going, not where they're from. This is so vectors make more sense logically.
def wind_reverser(direction):
    direction = direction.replace("N", "s")
    direction = direction.replace("S", "n")
    direction = direction.replace("E", "w")
    direction = direction.replace("W", "e")
    
    return direction.upper()

Determine angle between wind and park directions

In [20]:
# This calculates number of degrees for each direction
def find_degree(direction):
    if direction == "N":
        degree = 0
    elif direction == "NNE":
        degree = 1
    elif direction == "NE":
        degree = 2
    elif direction == "ENE":
        degree = 3
    elif direction == "E":
        degree = 4
    elif direction == "ESE":
        degree = 5
    elif direction == "SE":
        degree = 6
    elif direction == "SSE":
        degree = 7
    elif direction == "S":
        degree = 8
    elif direction == "SSW":
        degree = 9
    elif direction == "SW":
        degree = 10
    elif direction == "WSW":
        degree = 11
    elif direction == "W":
        degree = 12
    elif direction == "WNW":
        degree = 13
    elif direction == "NW":
        degree = 14
    elif direction == "NNW":
        degree = 15
        
    degree = degree * 22.5 

    return degree

Calculate wind x and y vectors

In [22]:
def calculate_vectors(row, azimuth_column, wind_column, speed_column):
    angle = row[wind_column] - row[azimuth_column]
    
    # Calculate vectors
    x_vect = round(math.sin(math.radians(angle)), 5) * row[speed_column]
    y_vect = round(math.cos(math.radians(angle)), 5) * row[speed_column]

    return pd.Series([x_vect, y_vect], index=['x_vect', 'y_vect'])

Calculate Park x Weather Factors

In [81]:
def calculate_wfx(game_df, swishanalytics_df, venue_map_df, l_park_latest_df, r_park_latest_df, base_rate_df):
    # Add box score data
    game_df[['weather', 'wind', 'venue', 'date', 'missing_weather']] = game_df['game_id'].apply(lambda game_id: pd.Series(create_box(game_id)))
    # Merge in roofType and azimuthAngle
    weather_input_df = game_df.merge(venue_map_df[['id', 'fieldInfo.roofType', 'location.azimuthAngle']], left_on=['venue_id'], right_on=['id'], how='left')
    # Clean weather data
    weather_input_df = clean_weather(weather_input_df)

    # Merge in Swish weather data, if available
    if swishanalytics_df is not None:
        # Merge on home team
        swishanalytics_df['home_team'] = swishanalytics_df['FANGRAPHSTEAM'].map(team_dict)
        weather_input_df['home_team'] = weather_input_df['home_team'].map(team_dict)
        weather_input_df = pd.merge(weather_input_df, swishanalytics_df.drop_duplicates(), on=['home_team'], how='left') # Note: double headers don't seem to get different weather, so who cares?
        # Reverse wind direction for calculating angles (set to N for no wind - won't matter)
        weather_input_df['Swish_Direction'] = weather_input_df['Direction'].fillna("N").apply(wind_reverser)
        # Calculate angle wind is blowing to
        weather_input_df['Swish_Angle'] = weather_input_df['Swish_Direction'].apply(find_degree)
        # Calculate wind vectors
        weather_input_df[['Swish_x_vect', 'Swish_y_vect']] = weather_input_df.apply(lambda row: calculate_vectors(row, 'location.azimuthAngle', 'Swish_Angle', 'Speed'), axis=1)
        # Standardize temperature name
        weather_input_df['Swish_temperature'] = weather_input_df['Temperature'].copy()

        # Take Swish Analytics weather if MLB Stats API is missing weather
        for column in ['x_vect', 'y_vect', 'temperature']:
            weather_input_df[column] = np.where(weather_input_df['missing_weather'] == True, weather_input_df[f'Swish_{column}'], weather_input_df[column])


    ### Inputs
    # Venue dummies
    venue_dummies = [col for col in l_park_latest_df.columns if (col.startswith("venue_") and not col.endswith("id"))]

    # Identify inputs
    model_input_list = ['x_vect', 'y_vect', 'temperature'] + venue_dummies
    # Loop over events
    for event in events_list: 
        # Define the dependent variable (e.g., `b1`) and independent variables
        model_input_list += [f'{event}_lg', f'{event}_lg', f'{event}_pfx']
    
    ### LHB        
    weather_input_l_df = pd.merge(weather_input_df, l_park_latest_df.drop(columns={'gamePk', 'game_date'}), on=['venue_id'], suffixes=("", "_l"))
    # Predicted outputs
    wfx_l_columns = [f"{col}_wfx_l" for col in list(predict_wfx_r.classes_)]
    weather_input_l_df[wfx_l_columns] = predict_wfx_l.predict_proba(weather_input_l_df[model_input_list].values)

    # Convert to PFX
    for event in events_list:
        weather_input_l_df[f"{event}_wfx_l"] = weather_input_l_df[f"{event}_wfx_l"] / base_rate_df[event][0]

    ### RHB    
    weather_input_r_df = pd.merge(weather_input_df, r_park_latest_df.drop(columns={'gamePk', 'game_date'}), on=['venue_id'], suffixes=("", "_r"))

    # Predicted outputs
    wfx_r_columns = [f"{col}_wfx_r" for col in list(predict_wfx_r.classes_)]
    weather_input_r_df[wfx_r_columns] = predict_wfx_r.predict_proba(weather_input_r_df[model_input_list].values)

    # Convert to PFX
    for event in events_list:
        weather_input_r_df[f"{event}_wfx_r"] = weather_input_r_df[f"{event}_wfx_r"] / base_rate_df[event][0]

    # Combine LHB and RHB weather effects
    wfx_df = pd.concat([weather_input_l_df, weather_input_r_df[wfx_r_columns]], axis=1)

    # Clean date
    wfx_df['date'] = wfx_df['game_date'].str.replace("-", "")
    # Rename
    wfx_df.rename(columns={'game_id': 'gamePk'}, inplace=True)

    
    return wfx_df[['gamePk', 'game_date', 'date', 'game_num', 'away_team', 'home_team', 'venue_id', 'x_vect', 'y_vect', 'temperature', 'weather'] + wfx_l_columns + wfx_r_columns]

### Run

##### 1. Swish Analytics

In [27]:
try:
    # Scrape Swish Analytics
    swishanalytics_df = swishanalytics(todaysdate)
    # To CSV
    swishanalytics_df.to_csv(os.path.join(baseball_path, "A06. Weather", "1. Swish Analytics", f"Swish Analytics {todaysdate}.csv"), index=False, encoding='iso-8859-1')
except:
    print("Could not scrape Swish Analytics weather data.")

##### 2. RotoGrinders

In [29]:
try:
    # Scrape RotoGrinders
    rotogrinders_df = rotogrinders(todaysdate, team_map)
    # To CSV
    rotogrinders_df.to_csv(os.path.join(baseball_path, "A06. Weather", "2. RotoGrinders", f"RotoGrinders {todaysdate}.csv"), index=False)
except:
    print("Could not scrape RotoGrinders weather data.")

##### 3. Park x Weather Factors

In [94]:
# Read in Swish data
try:
    swishanalytics_df = pd.read_csv(os.path.join(baseball_path, "A06. Weather", "1. Swish Analytics", f"Swish Analytics {todaysdate}.csv"), encoding='iso-8859-1')
    if 'x_vect' in swishanalytics_df.columns:
        swishanalytics_df.drop(columns=['x_vect', 'y_vect', 'CF'], inplace=True)
        swishanalytics_df.rename(columns={'temperature': 'Temperature'}, inplace=True)
except:
    print("Swish Analytics data not available.")
    swishanalytics_df = None

# Read in latest park data
l_park_latest_df = pd.read_csv(os.path.join(baseball_path, "Park Latest - LHB.csv"))
r_park_latest_df = pd.read_csv(os.path.join(baseball_path, "Park Latest - RHB.csv"))

# Read in base rates
base_rate_df = pd.read_csv(os.path.join(baseball_path, "Base Rates.csv"))

# Extract day's games
daily_game_df = game_df[game_df['date'].astype(str) == todaysdate].reset_index(drop=True)

# Calculate PFX
wfx_df = calculate_wfx(daily_game_df, swishanalytics_df, venue_map_df, l_park_latest_df, r_park_latest_df, base_rate_df)

# Write to CSV
wfx_df.to_csv(os.path.join(baseball_path, "A06. Weather", "3. Park and Weather Factors", f"{todaysdate} Park and Weather Factors.csv"), index=False, encoding='iso-8859-1')

In [98]:
wfx_df['away_team'].fillna("OAK", inplace=True)