- [1. Instructions](#1)
- [2. Importing Functions and Packages](#2)

## 1. Instructions <a id='1'></a>
- Connect to GitHub in py file

## 2. Importing Functions and Packages <a id='2'></a>

In [57]:
import requests
import base64
from io import StringIO
import pandas as pd
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import playergamelog, leaguedashteamstats, teamyearbyyearstats
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import players, teams
import datetime
import time


In [71]:
def merge_with_suffixes(dataframes, names, keys):
    """Merging different sets of data of the same season together"""
    suffixed_dfs = []
    for df, name in zip(dataframes, names):
        # Suffix non-key columns only
        suffixed_cols = {col: f"{col}_{name}" if col not in keys else col for col in df.columns}
        suffixed_dfs.append(df.rename(columns=suffixed_cols))

    merged_df = suffixed_dfs[0]
    for df in suffixed_dfs[1:]:
        merged_df = pd.merge(merged_df, df, on=keys, how='inner')
    return merged_df
    

def extracting_today_data(from_date=None, season='2023-24', season_type='Regular Season', data_type='Base', delay=5):
    year_start = season[:4]
    year_end = str(int(year_start)+1)
    season_ = year_start + "-" + year_end[2:]
    season = year_start + "_" + year_end
    if from_date == None:
        path_today = f"/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/nba_api/data/teams_stats/{season}/base_{season}.csv"
        from_date = pd.read_csv(path_today, parse_dates=['Date'])['Date'].max().date()

    season_end = datetime.date.today()
    
    current_date = from_date
    all_data = []

    unsuccessful_dates = []
    
    while current_date <= season_end:
        date_str = current_date.strftime('%m/%d/%Y')
        
        try:
            daily_stats = leaguedashteamstats.LeagueDashTeamStats(
                measure_type_detailed_defense=data_type,
                season=season_,
                season_type_all_star=season_type,
                date_to_nullable=date_str
            ).get_data_frames()[0]
            daily_stats['Date'] = date_str
            all_data.append(daily_stats)
            print(f"Data fetched for {date_str}")
        except Exception as e:
            unsuccessful_dates += [date_str]
            print(f"Error fetching data for {date_str}: {e}")
        time.sleep(delay)
        current_date += datetime.timedelta(days=1)
        
    full_season_data = pd.concat(all_data, ignore_index=True)
    return full_season_data, unsuccessful_dates

def getting_stats(delay=5, retrieving_from=None):
    """Getting cumulative season stats for each game"""
    track_start='2023'
    season_end='2024'
    if retrieving_from is None:
        retrieving_from = datetime.now() - timedelta(seconds=delay)
    
    base, unsuccessful_dates_base = extracting_today_data(from_date=retrieving_from, data_type='Base', delay=delay)
    advanced, unsuccessful_dates_advanced = extracting_today_data(from_date=retrieving_from, data_type='Advanced', delay=delay)
    misc, unsuccessful_dates_misc = extracting_today_data(from_date=retrieving_from, data_type='Misc', delay=delay)
    four_factors, unsuccessful_dates_four_factors = extracting_today_data(from_date=retrieving_from, data_type='Four Factors', delay=delay)
    scoring, unsuccessful_dates_scoring = extracting_today_data(from_date=retrieving_from, data_type='Scoring', delay=delay)
    opponent, unsuccessful_dates_opponent = extracting_today_data(from_date=retrieving_from, data_type='Opponent', delay=delay)
    defense, unsuccessful_dates_defense = extracting_today_data(from_date=retrieving_from, data_type='Defense', delay=delay)

    year_start = track_start[:4]
    year_end = season_end[:4]
    season_ = year_start + "_" + year_end[2:]
    season = year_start + "_" + year_end

    datas = [base, advanced, misc, four_factors, scoring, opponent, defense]
    
    unsuccessful_dates_lst = [unsuccessful_dates_base, unsuccessful_dates_advanced, unsuccessful_dates_misc, unsuccessful_dates_four_factors, unsuccessful_dates_scoring, unsuccessful_dates_opponent, unsuccessful_dates_defense]
    datas_names = ["base", "advanced", "misc", "four_factors",
               "scoring", "opponent", "defense"]
    columns_to_exclude = ['TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN']
    others = [advanced, misc, four_factors, scoring, opponent, defense]
    others = [i[i.columns[~i.columns.isin(columns_to_exclude)]] for i in others]
    datas = [base] + others
    
    merge_keys = ['Date', 'TEAM_ID']
    merged_df = merge_with_suffixes(datas, datas_names, merge_keys)    
    latest_path = f"/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/nba_api/data/teams_stats/{season}/cumulative_season_stats_{season}.csv"
    prev = pd.read_csv(latest_path, parse_dates=['Date'])
    merged_df = pd.concat([prev, merged_df], ignore_index=True)
    merged_df.to_csv(latest_path, index=False)
    return merged_df

In [86]:
# Your personal access token and repo details
token = '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'
username = 'bluefish0125'
repo = 'Sports-Betting'
path_to_file = 'nba_api/data/teams_stats/processed_cum_2018_2024.csv'

# GitHub API URL for your file
url = f'https://api.github.com/repos/{username}/{repo}/contents/{path_to_file}'

# Hvae to use raw file
raw_url = 'https://raw.githubusercontent.com/bluefish0125/Sports-Betting/main/nba_api/data/teams_stats/processed_cum_2018_2024.csv'

# Headers for authentication
headers = {
    'Authorization': f'token {token}',
    'Accept': 'application/vnd.github.VERSION.raw'
}

# Send a GET request with headers including your personal access token for authentication
response = requests.get(raw_url, headers=headers)
if response.status_code == 200: # 200 means successful
    df = pd.read_csv(StringIO(response.text), parse_dates=['Date'])
    df['Date'] = pd.to_datetime(df['Date'], format='mixed')    
    max_day = df['Date'].max().date()
    next_day = max_day + datetime.timedelta(days=1)
else:
    print(f"Failed to retrieve the CSV file. Status Code: {response.status_code}")


  df = pd.read_csv(StringIO(response.text), parse_dates=['Date'])


In [87]:
next_day

datetime.date(2024, 4, 2)

In [81]:
today = getting_stats(retrieving_from=next_day)

Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024
Data fetched for 04/03/2024
Data fetched for 04/04/2024


In [45]:
# Convert the DataFrame to CSV
csv_content = df.iloc[0, :].to_csv(index=False)
content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
new_path_to_file = 'data_pipeline/test_file/test_file_1.csv'
new_url = f'https://api.github.com/repos/{username}/{repo}/contents/{new_path_to_file}'
# Prepare headers
headers = {
    'Authorization': f'token {token}',
    'Accept': 'application/vnd.github.v3+json',
}

# Fetch the file from GitHub to get its SHA
response = requests.get(url, headers=headers)
data = response.json()
sha = data['sha']

# Create the payload with the new content and the SHA
payload = {
    'message': 'Update CSV file',
    'content': content_encoded,
    'sha': sha,
    'branch': 'main',  # specify the branch if not 'main'
}

# Make a PUT request to update the file
response = requests.put(url, headers=headers, json=payload)

if response.status_code == 200:
    print('File updated successfully.')
else:
    print('Failed to update the file.')


File updated successfully.


In [46]:
csv_content = df.iloc[0, :].to_csv(index=False)
content_encoded = base64.b64encode(csv_content.encode('utf-8')).decode('utf-8')
new_path_to_file = 'data_pipeline/test_file/test_file_1.csv'
new_url = f'https://api.github.com/repos/{username}/{repo}/contents/{new_path_to_file}'

# Prepare headers for authentication
headers = {
    'Authorization': f'token {token}',
    'Accept': 'application/vnd.github.v3+json',
}

# Create the payload with the new content
payload = {
    'message': 'Add new CSV file',
    'content': content_encoded,
    'branch': 'main',  # specify the branch if not 'main'
}

# Make a PUT request to upload the file
response = requests.put(new_url, headers=headers, json=payload)

if response.status_code == 201:
    print('New file created successfully.')
else:
    print(f'Failed to create the new file. Status Code: {response.status_code} Response: {response.json()}')

New file created successfully.


In [None]:
from nba_api.stats.endpoints import leaguegamefinder
from nba_api.stats.endpoints import playergamelog, leaguedashteamstats, teamyearbyyearstats
from nba_api.stats.library.parameters import SeasonAll
from nba_api.stats.static import players, teams
import datetime
import time

def merge_with_suffixes(dataframes, names, keys):
    """Merging different sets of data of the same season together"""
    suffixed_dfs = []
    for df, name in zip(dataframes, names):
        # Suffix non-key columns only
        suffixed_cols = {col: f"{col}_{name}" if col not in keys else col for col in df.columns}
        suffixed_dfs.append(df.rename(columns=suffixed_cols))

    merged_df = suffixed_dfs[0]
    for df in suffixed_dfs[1:]:
        merged_df = pd.merge(merged_df, df, on=keys, how='inner')
    return merged_df
    

def extracting_today_data(from_date=None, season='2023-24', season_type='Regular Season', data_type='Base', delay=5):
    year_start = season[:4]
    year_end = str(int(year_start)+1)
    season_ = year_start + "-" + year_end[2:]
    season = year_start + "_" + year_end
    if from_date == None:
        path_today = f"/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/nba_api/data/teams_stats/{season}/base_{season}.csv"
        from_date = pd.read_csv(path_today, parse_dates=['Date'])['Date'].max().date()

    season_end = datetime.date.today()
    
    current_date = from_date
    all_data = []

    unsuccessful_dates = []
    
    while current_date <= season_end:
        date_str = current_date.strftime('%m/%d/%Y')
        
        try:
            daily_stats = leaguedashteamstats.LeagueDashTeamStats(
                measure_type_detailed_defense=data_type,
                season=season_,
                season_type_all_star=season_type,
                date_to_nullable=date_str
            ).get_data_frames()[0]
            daily_stats['Date'] = date_str
            all_data.append(daily_stats)
            print(f"Data fetched for {date_str}")
        except Exception as e:
            unsuccessful_dates += [date_str]
            print(f"Error fetching data for {date_str}: {e}")
        time.sleep(delay)
        current_date += datetime.timedelta(days=1)
        
    full_season_data = pd.concat(all_data, ignore_index=True)
    return full_season_data, unsuccessful_dates

def getting_stats(delay=5):
    """Getting cumulative season stats for each game"""
    track_start='2023'
    season_end='2024'
    
    base, unsuccessful_dates_base = extracting_today_data(data_type='Base', delay=delay)
    advanced, unsuccessful_dates_advanced = extracting_today_data(data_type='Advanced', delay=delay)
    misc, unsuccessful_dates_misc = extracting_today_data(data_type='Misc', delay=delay)
    four_factors, unsuccessful_dates_four_factors = extracting_today_data(data_type='Four Factors', delay=delay)
    scoring, unsuccessful_dates_scoring = extracting_today_data(data_type='Scoring', delay=delay)
    opponent, unsuccessful_dates_opponent = extracting_today_data(data_type='Opponent', delay=delay)
    defense, unsuccessful_dates_defense = extracting_today_data(data_type='Defense', delay=delay)

    year_start = track_start[:4]
    year_end = season_end[:4]
    season_ = year_start + "_" + year_end[2:]
    season = year_start + "_" + year_end

    datas = [base, advanced, misc, four_factors, scoring, opponent, defense]
    unsuccessful_dates_lst = [unsuccessful_dates_base, unsuccessful_dates_advanced, unsuccessful_dates_misc, unsuccessful_dates_four_factors, unsuccessful_dates_scoring, unsuccessful_dates_opponent, unsuccessful_dates_defense]
    datas_names = ["base", "advanced", "misc", "four_factors",
               "scoring", "opponent", "defense"]
    columns_to_exclude = ['TEAM_NAME', 'GP', 'W', 'L', 'W_PCT', 'MIN']
    others = [advanced, misc, four_factors, scoring, opponent, defense]
    others = [i[i.columns[~i.columns.isin(columns_to_exclude)]] for i in others]
    datas = [base] + others
    
    merge_keys = ['Date', 'TEAM_ID']
    merged_df = merge_with_suffixes(datas, datas_names, merge_keys)    
    latest_path = f"/Users/liqingyang/Documents/GitHub/sports_trading/sports_betting/nba_api/data/teams_stats/{season}/cumulative_season_stats_{season}.csv"
    prev = pd.read_csv(latest_path, parse_dates=['Date'])
    merged_df = pd.concat([prev, merged_df], ignore_index=True)
    merged_df.to_csv(latest_path, index=False)
    return merged_df

today = getting_stats()