### Project submission group
- Group member 1
    - Name: Joe Becker
    - Email: jb4575@drexel.edu
- Group member 2
    - Name: Christian Ekwomadu
    - Email: cce49@drexel.edu
- Group member 3
    - Name: Deepak Bhadouria
    - Email: db3533@drexel.edu
- Group member 4
    - Name: Surya Samarth J
    - Email: sj3244@drexel.edu

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import requests
import time
import csv
import re
from collections import defaultdict

# Sport Radar Script

In [None]:
access_level = "trial"
version = "v7"
language_code = "en"
season_year = "2022"
nba_season = "REG"
format_var = "json"
your_api_key = "[YOUR API KEY]"

def get_team_json_resp(season_year, access_level, version, language_code, nba_season, format_var, your_api_key):

  team_id_url = f"https://api.sportradar.us/nba/{access_level}/{version}/{language_code}/seasons/{season_year}/{nba_season}/rankings.{format_var}?api_key={your_api_key}"

  return requests.get(team_id_url).json()

def get_seas_json_resp(season_year, access_level, version, language_code, nba_season, format_var, your_api_key):

  seas_stat_url = f"https://api.sportradar.us/nba/{access_level}/{version}/{language_code}/seasons/{season_year}/{nba_season}/teams/{t_id}/statistics.{format_var}?api_key={your_api_key}"

  return requests.get(seas_stat_url).json()

In [None]:
team_ids = []
seas_team_avgs = []
seas_plyr_avgs = []

team_count = 0
plyr_count = 0

team_id_resp = get_team_json_resp(season_year, access_level, version, language_code, nba_season, format_var, your_api_key)

# Get list of teams for use in 2nd loop
for key in team_id_resp:
  if key == 'conferences':
    for conference in team_id_resp['conferences']:  
        for item in conference:
          if item == 'divisions':
            for division in conference['divisions']:
                for team in division['teams']:
                    team_ids.append({
                        'team_id' : team['id'],
                        'team_market' : team['market'],
                        'team_name' : team['name'],
                        'sr_id' : team['sr_id']
                    })

# SportsRadar limits api calls to 1 per second.
# To ensure the code below executes w/o error, wait 1 second
time.sleep(1)

# Get average point info by team and player
for team_id in team_ids:
   
    t_name = team_id['team_market']+" "+team_id['team_name']
    t_id = team_id['team_id']
    
    seas_stats = get_seas_json_resp(season_year, access_level, version, language_code, nba_season, format_var, your_api_key)
    
    # Get team stats for current season
    seas_team_avgs.append({
        'team': t_name,
        'points_per_game': seas_stats['own_record']['average']['points'],
        'three_pointers': seas_stats['own_record']['average']['three_points_made'],
        'pts_off_turnovers': seas_stats['own_record']['average']['points_off_turnovers']
    })
    team_count += 1
    
    # Get player stats for each team in current season
    for plyr in seas_stats['players']:
        seas_plyr_avgs.append({
            'team': t_name,
            'player_name': plyr['full_name'],
            'points_per_game': plyr['average']['points'],
            'three_pointers': plyr['average']['three_points_made'],
            'pts_off_turnovers': plyr['average']['points_off_turnovers']
        })
        plyr_count += 1

    # Need to wait between loop executions
    time.sleep(1)

# Convert output from api calls into csv files for access at a later date
# team info
with open(f"/content/{season_year}_season_team_averages.csv", 'w') as seas_team_avgs_file:
  seas_team_avgs_file.write('team,points_per_game,three_pointers,pts_off_turnovers\n')
  for item in seas_team_avgs:
    line = item['team']+','+str(item['points_per_game'])+','+str(item['three_pointers'])+','+str(item['pts_off_turnovers'])+'\n'
    seas_team_avgs_file.write(line)

# player info
with open(f"/content/{season_year}_season_player_averages.csv", 'w') as seas_plyr_avgs_file:
  seas_plyr_avgs_file.write('team,player_name,points_per_game,three_pointers,pts_off_turnovers\n')
  for item in seas_plyr_avgs:
    line = item['team']+','+item['player_name']+','+str(item['points_per_game'])+','+str(item['three_pointers'])+','+str(item['pts_off_turnovers'])+'\n'
    seas_plyr_avgs_file.write(line)
    
print('Script returned %d total teams and %d total players.' %(team_count,plyr_count))

Script returned 30 total teams and 506 total players.


# Reddit Script

In [None]:
# Reddit API Calls

client_auth = requests.auth.HTTPBasicAuth('[YOUR API KEY]','[YOUR API SECRET]')
post_data = {'grant_type': 'password', 'username': '[username]', 'password': '[password]'}
token_headers = {'User-Agent': 'DSCI511-Final-Project/v0.1 by jb4575'}
token_response = requests.post('https://www.reddit.com/api/v1/access_token',
                         auth = client_auth,
                         data = post_data,
                         headers = token_headers).json()
token_response

{'access_token': '2288100762636-98YiM6cF5HSBsWP0hoLwv60YA7MIJg',
 'token_type': 'bearer',
 'expires_in': 86400,
 'scope': '*'}

In [None]:
def get_oauth_response(token_response, token_headers, after = '', count = 0):
  """Use Reddit API access_token to start returning actual data from Reddit via oauth API calls"""
  
  oauth_url = 'https://oauth.reddit.com/r/nba/search?q=flair_name%3A%22Index%20Thread%22&sort=new&limit=100&restrict_sr=1'
  
  if after != '':
    oauth_url += f'&after={after}'
  
  if count != 0:
    oauth_url += f'&count={str(count)}'
  
  oauth_headers = {'Authorization': f"bearer {token_response['access_token']}", 'User-Agent': token_headers['User-Agent']}
  oauth_response = requests.get(oauth_url, 
                              headers = oauth_headers).json()

  return oauth_response

In [None]:
def get_game_links(oauth_resp_child):
  """Get the list of game thread links so that they can later be used to get all comments related to a game in a single thread"""
  
  game_text = oauth_resp_child['data']['selftext']
  game_list = game_text.split('\n')

  game_date = game_list[0][22:-2]
  game_links = []
  game_teams = []

  for game in game_list:
    try:
      game_col = game.split('|')
    
      if game_col[2][:6] == '[Link]':
        game_link = game_col[2]
        game_link_slice = game_link[36:-1]
        
        game_links.append(game_link_slice)

        away_team_slice = game_col[3].find("]")
        home_team_slice = game_col[5].find("]")

        away_team = game_col[3][1:away_team_slice]
        home_team = game_col[5][1:home_team_slice]

        game_teams.append(away_team+" @ "+home_team)

    except IndexError:
      continue

  return game_date, game_teams, game_links

In [None]:
def get_game_response(token_response, token_headers, game_link):
  game_oauth_headers = {'Authorization': f"bearer {token_response['access_token']}", 'User-Agent': token_headers['User-Agent']}
  game_oauth_response = requests.get(f'https://oauth.reddit.com/r/nba/{game_link}', 
                              headers = oauth_headers).json()
  
  return game_oauth_response[1] # index = 1 gets comment info.  index = 0 gets post info

In [None]:
def get_comment_data(response):
  if response.get('data') != None:
    return response['data']

def get_comment_children(response):
  if response.get('children') != None:
    return response['children']

def get_comment_replies(response):
  if response.get('replies') != None:
    return response['replies']

def get_comment_body(response):
  if response.get('body') != None:
    return response['body']

def get_comment_text(response, comment_list):
  data_check = None
  children_check = None
  replies_check = None 
  body_check = None
  
  if type(response) == dict:
    data_check = get_comment_data(response)
    children_check = get_comment_children(response)
    replies_check = get_comment_replies(response)
    body_check = get_comment_body(response)

  if body_check != None:
    comment_list.append(body_check)
  
  if replies_check != None:
    return get_comment_text(replies_check, comment_list)
  elif children_check != None:
    return get_comment_text(children_check, comment_list)
  elif data_check != None:
    return get_comment_text(data_check, comment_list)
  elif type(response) == list:
    for item in response:
      get_comment_text(item, comment_list)

In [None]:
oauth_response = get_oauth_response(token_response, token_headers)

with open('/content/NBA_subreddit_matchup_discussions.csv', 'w') as matchups:
  matchup_writer = csv.writer(matchups, delimiter=',', quotechar='"', quoting=csv.QUOTE_ALL)
  matchup_writer.writerow(['date', 'matchup', 'thread_link', 'thread_comments'])

  for child in oauth_response['data']['children']:
    game_date, game_teams, game_links = get_game_links(child)

    for i in range(len(game_teams)):

      time.sleep(1)

      comment_list = []
      game_response = get_game_response(token_response, token_headers, game_links[i])
      get_comment_text(game_response, comment_list)

      matchup_writer.writerow([game_date, game_teams[i], game_links[i], comment_list]) 
          

python3: can't open file '‐‐version': [Errno 2] No such file or directory
