In [92]:
# import requests
# from bs4 import BeautifulSoup
# import pandas as pd
# import os

# baseball_path = r"C:\Users\james\Documents\MLB\Data2"


# RotoWire

In [93]:
def pick_slate(Name):
    if "(Early)" in Name:
        slate = "Early"
    elif "(Late Night)" in Name:
        slate = "Late Night"
    elif "Night" in Name:
        slate = "Night"
    else:
        slate = "All"
        
    return slate

In [119]:
def scrape_slates(date):
    date_dash = date[0:4] + "-" + date[4:6] + "-" + date[6:]
    url = 'https://www.rotowire.com/daily/mlb/saved-lineups.php?date={}'.format(date_dash)
    
    def fetch_page_source(url):
        response = requests.get(url)
        if response.status_code == 200:
            return response.text
        else:
            raise Exception(f"Failed to fetch page source. Status code: {response.status_code}")

    def extract_data_from_page(html_content):
        soup = BeautifulSoup(html_content, 'html.parser')
        slates_data = []
        for slate in soup.find_all('a', class_='dfs-slate'):
            date_fragment, time = [text.strip() for text in slate.find('div', class_='dfs-slate-desc').stripped_strings]
            time = time.lower()

            slate_name_parts = [text.strip() for text in slate.find('div', class_='dfs-slate-name').stripped_strings]
            slate_name = slate_name_parts[0]
            num_games = slate_name_parts[-1].split()[0]  # Extract the number of games from the last part

            slate_id = slate['href'].split('slateID=')[1]

            slates_data.append({'date': date, 'slateID': slate_id, 'name': slate_name, 'time': time, 'games': num_games})

        return slates_data

    page_source = fetch_page_source(url)
    data = extract_data_from_page(page_source)
    
    # Create a pandas DataFrame
    df = pd.DataFrame(data)
    
    # Games will be a string of one of the team abbreviations. Fix it.
    df['games'] = pd.to_numeric(df['games'], errors='coerce')
    df['games'].fillna(1, inplace=True)
    df['games'] = df['games'].astype('int') 
        
    df.to_csv(os.path.join(baseball_path, "11. Projections", "RotoWire", "A. Slates", "Slates " + date + ".csv"), index=False)
    
    return df

In [106]:
def scrape_points(slateID):
    
    url = f'https://www.rotowire.com/optimizer/api/mlb/players.php?slateID={slateID}'
    
    try:
        # Fetch JSON data from the API
        response = requests.get(url)
        response.raise_for_status()  # Raise an exception for unsuccessful responses
        api_data = response.json()
    except requests.exceptions.RequestException as e:
        print(f"Error fetching data from API: {e}")
        return None

    if not api_data:
        print("No data found in API response.")
        return None

    extracted_data = []

    for entry in api_data:
        rwID = entry.get('rwID')
        slate_id = entry.get('slateID')
        first_name = entry.get('firstName')
        last_name = entry.get('lastName')
        roto_pos = entry.get('rotoPos')
        position = ','.join(entry.get('pos', []))
        throws = entry.get('throws')
        bats = entry.get('bats')
        is_pitcher = entry.get('isPitcher')
        is_batter = entry.get('isBatter')
        team_abbr = entry.get('team', {}).get('abbr')
        team_city = entry.get('team', {}).get('city')
        team_nickname = entry.get('team', {}).get('nickname')
        game_date_time = entry.get('game', {}).get('dateTime')
        game_is_dome = entry.get('game', {}).get('isDome')
        salary = entry.get('salary')
        points = entry.get('pts')
        rostership = entry.get('rostership')

        row = {
            'rwID': rwID,
            'slateID': slate_id,
            'firstName': first_name,
            'lastName': last_name,
            'rotoPos': roto_pos,
            'position': position,
            'throws': throws,
            'bats': bats,
            'isPitcher': is_pitcher,
            'isBatter': is_batter,
            'teamAbbr': team_abbr,
            'teamCity': team_city,
            'teamNickname': team_nickname,
            'gameDateTime': game_date_time,
            'gameIsDome': game_is_dome,
            'salary': salary,
            'points': points, 
            'rostership': rostership
        }

        extracted_data.append(row)

    # Create a pandas DataFrame
    df = pd.DataFrame(extracted_data)
    
    df.to_csv(os.path.join(baseball_path, "11. Projections", "RotoWire", "B. Projections", "Slate " + str(slateID) + ".csv"), index=False)

    return df

In [159]:
def read_rotowire(contestKey):
    # Identify index of contest
    history_index = history_full.loc[history_full['contestKey'] == contestKey].index[0]
    
    # Extract name and date
    name = history_full.loc[history_index, 'name']
    date = history_full.loc[history_index, 'date']
    
    # Use name to pick slate type (All, Afternoon, etc...)
    slate = pick_slate(name)
    
    # Read in slates
    roto_slates = pd.read_csv(os.path.join(baseball_path, "11. Projections", "RotoWire", "A. Slates", "Slates " + str(int(date)) + ".csv"))
    roto_index = roto_slates.loc[roto_slates['name'] == slate].index[0]
    roto_slate = roto_slates.loc[roto_index, 'slateID']

    roto_projections = pd.read_csv(os.path.join(baseball_path, "11. Projections", "RotoWire", "B. Projections", "Slate " + str(roto_slate) + ".csv"), encoding='iso-8859-1')
    
    return roto_projections



In [154]:
# # Read in history file        
# history = pd.read_csv(os.path.join(baseball_path, "Utilities", "Contests.csv"))

# # Sort by date, then draft group, then fee
# history.sort_values(['date', 'draftGroupId', 'entryFee'], ascending=False)
# # Keep only one observation per draft group 
# history.drop_duplicates('draftGroupId', keep='first', inplace=True)

# history_full = history.copy()
# history = history.query('result == 1').query('payout == 1').query('salary == 1')

# history = history.reset_index(drop=True)
# history['date'] = history['date'].astype('int').astype('str')

In [160]:
# roto_projections = read_rotowire(147446392)
# roto_projections

In [135]:
# from datetime import datetime, timedelta
# import time

# def get_date_range(start_date, end_date):
#     date_format = "%Y%m%d"
#     start_date_obj = datetime.strptime(start_date, date_format)
#     end_date_obj = datetime.strptime(end_date, date_format)

#     current_date = start_date_obj
#     while current_date <= end_date_obj:
#         yield current_date.strftime(date_format)
#         current_date += timedelta(days=1)

# # Example usage:
# start_date = "20221101"
# end_date = "20221105"
# for date in get_date_range(start_date, end_date):
#     print(date)
#     try:
#         scrape_slates(date)
#         time.sleep(2)
#     except:
#         print("Nothing for this day")
    


In [136]:
# # All, Early, Afternoon, Night, Late Night
# directory_path = r'C:\Users\james\Documents\MLB\Data2\11. Projections\RotoWire\A. Slates'

# def process_csv_files_in_directory(directory_path):
#     csv_files = [file for file in os.listdir(directory_path) if file.endswith('.csv')]

#     for file in csv_files:
#         file_path = os.path.join(directory_path, file)
#         try:
#             df = pd.read_csv(file_path)

#             for index, row in df.iterrows():
#                 if row['name'] in ["All", "Early", "Afternoon", "Night", "Late Night"]:
#                     slate_id = row['slateID']
#                     output_file = os.path.join(r'C:\Users\james\Documents\MLB\Data2\11. Projections\RotoWire\B. Projections', f'Slate {slate_id}.csv')

#                     if not os.path.exists(output_file):
#                         scrape_points(slate_id)

#                         time.sleep(5)
#         except:
#             print(file_path)
                    
# # Call the function to process the CSV files in the directory
# process_csv_files_in_directory(directory_path)
