# NBA Free Agency

This notebook aims to determine whether the top 10 NBA free agents (or players who opt for player option in their contracts) are likely stay or leave a team during free agency or the mid-season trade deadline. This will be determined using NLP techniques on phrases extracted from tweets, news, interviews, polls, basketball stats, etc. 

#### Top 10 NBA Free Agents 2019
According to SBNation and ESPN these players are: <br>
Link: https://www.sbnation.com/nba/2018/7/30/17616436/nba-free-agency-2019-list-kevin-durant-kyrie-irving

1. Kevin Durant
2. Kawhi Leonard
3. Kyrie Irving
4. Jimmy Butler 
5. Klay Thompson
6. DeMarcus Cousins
7. Al Horford
8. Kemba Walker
9. Khris Middleton
10. Eric Bledsoe

### Importing Libraries

In [1]:
#Web Scraping
from bs4 import BeautifulSoup
import requests
import urllib
import string

#Text Processing
#Download package for word_tokenize NLTK functions 
#  1. punkt
#  2. stopwords
import re
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to /Users/arnavgarg/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/arnavgarg/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

### Links

In [2]:
def return_links(user_query):
    links = []
    google_search = "https://www.google.com/search?sclient=psy-ab&client=ubuntu&hs=k5b&channel=fs&biw=1366&bih=648&noj=1&q=" + user_query
    r = requests.get(google_search)
    if r.ok:
        soup = BeautifulSoup(r.text, "html.parser")
        for item in soup.find_all('h3', attrs={'class' : 'r'}):
            links.append(item.a['href'][7:])
    else:
        f = open("Error-Logs/query_errors.txt","a+")
        f.write("\n")
        f.write(user_query)
        f.close()
    return links

def clean_links(links):
    for i in range(0, len(links)):
        x = links[i].find('&')
        if x != -1:
            links[i] = links[i][:x]
        for i in range(0, len(links)):
            x = links[i].find('%')
            if x != -1:
                links[i] = links[i][:x]
    #Remove invalid google search query link
    links.remove(links[0])
    for i in range(len(links)):
        print(i+1,links[i])
    return links    

### Text

In [3]:
def clean_text(text):
    #Convert to lower case and tokenize
    #Only extract words that are alpha-numeric
    tokens = word_tokenize(text.lower())
    cleaned = [word for word in tokens if word.isalpha()]
    #Create a dictionary of stem-words such as "at" and "
    #the" that don't contribute to meaning and remove them from the list
    stop_words = set(stopwords.words('english'))
    words = [w for w in cleaned if not w in stop_words]
    #Stem words to root words if/where possible
    porter = PorterStemmer()
    stemmed = [porter.stem(word) for word in words]
    #Remove common html markup words
    html_words = ['html','http','https','.com','.org','.edu', 
                  'img', 'href', 'span', 'b', 'u']
    words = [w for w in stemmed if not w in html_words]
    return words
    
count = 0
def save_text(links):
    global count
    for i in links:
        r = requests.get(i)
        if r.ok:
            soup = BeautifulSoup(r.content, "html.parser")
            text = soup.find_all('p')
            page_text = ""
            for item in text:
                str_contents = str(item.contents)
                len_contents = len(str_contents)
                page_text += str_contents[1:len_contents-1]
            text = clean_text(page_text)
            f = open("Excerpts/excerpt{}.csv".format(count),"w+")
            f.write(str(i)+"\n\n")
            f.write(str(text))
            f.close()
            page_text = ""
            count += 1
        else:
            f = open("Error-Logs/text_saving_errors.txt","a+")
            f.write("\n")
            f.write(i)
            f.close()
    print("\n{} files saved.".format(count))

### Main Program

In [4]:
a = return_links("Lebron James' shocking decision to join Miami Heat")
a = clean_links(a)
save_text(a)

1 https://www.wsj.com/articles/SB10001424052748704111704575355162960155010
2 http://www.espn.com/video/clip
3 https://www.cleveland.com/cavs/index.ssf/2014/07/lebron_james_cavaliers_heat_decision_2.html
4 https://www.sun-sentinel.com/sports/miami-heat/fl-sp-miami-heat-lebron-james-s20180609-story.html
5 https://www.nbcnews.com/think/opinion/lebron-james-decision-leave-cavaliers-lakers-isn-t-surprising-his-ncna888331
6 https://bleacherreport.com/articles/2785744-lebron-james-decision-and-the-most-shocking-free-agent-signings-in-nba-history
7 https://www.theguardian.com/sport/2010/jul/09/lebron-james-joins-miami-heat
8 https://www.telegraph.co.uk/sport/othersports/basketball/7880779/LeBron-James-announces-move-to-Miami-Heat-as-Cavaliers-fans-burn-jerseys-in-anger.html
9 https://abcnews.go.com/GMA/lebron-james-cavs-owner-dan-gilbert-writes-letter/story

7 files saved.
