In [1]:
import pandas as pd
import snscrape.modules.twitter as sntwitter #scraping twitter without an API
import datetime

In [2]:
def get_player_tweets(name):
    lst = []
    for i,tweet in enumerate(sntwitter.TwitterSearchScraper(name + ' from:Underdog__NBA since:2022-10-14').get_items()):
        if not "Lineup alert" in tweet.content and not "Load Mismanagement" in tweet.content:
            lst.append([tweet.url, tweet.date, tweet.content])
    df = pd.DataFrame(lst, columns =["URL", "Date", "Text"])
    return df

#Note: to get rewtweeted tweet: tweet.retweetedTweet, and to get quoted: tweet.quotedTweet, to get mentioned users: tweet.mentionedUsers
#to get username: tweet.username

In [79]:
giannis_df = get_player_tweets("Giannis")
giannis_df

  if not "Lineup alert" in tweet.content and not "Load Mismanagement" in tweet.content:
  lst.append([tweet.url, tweet.date, tweet.content])


Unnamed: 0,URL,Date,Text
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29 21:38:27+00:00,Giannis Antetokounmpo (knee) will play Wednesday.
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28 20:30:48+00:00,Giannis Antetokounmpo (knee) listed probable f...
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27 17:30:23+00:00,Giannis Antetokounmpo (knee) listed out Monday.
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26 20:31:23+00:00,Giannis Antetokounmpo (knee) listed probable f...
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-14 01:30:11+00:00,Giannis Antetokounmpo (hand) listed available ...
...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05 17:30:30+00:00,Giannis Antetokounmpo (knee) listed questionab...
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-05 01:30:19+00:00,Giannis Antetokounmpo (knee) listed available ...
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-05 00:34:11+00:00,Status alert: Giannis Antetokounmpo (knee) a g...
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04 17:20:24+00:00,Status alert: Giannis Antetokounmpo (knee) wen...


## Data Cleaning

### Cleaning the Date

1. We need to convert timezone from UTC to US/Eastern.
2. Separate date and time

In [80]:
def clean_date(df):
  """
  Converts the time in the date column to EST, and separates the date and time into two separate columns.
  """
  giannis_df["Date"] = giannis_df["Date"].dt.tz_convert(tz = "US/Eastern") #converting from utc timezone to est
  date = pd.to_datetime(df["Date"])
  df["Date"] = date.dt.date
  df["Time"] = date.dt.time

In [81]:
clean_date(giannis_df)
giannis_df

Unnamed: 0,URL,Date,Text,Time
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29,Giannis Antetokounmpo (knee) will play Wednesday.,17:38:27
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28,Giannis Antetokounmpo (knee) listed probable f...,16:30:48
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27,Giannis Antetokounmpo (knee) listed out Monday.,13:30:23
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26,Giannis Antetokounmpo (knee) listed probable f...,16:31:23
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-13,Giannis Antetokounmpo (hand) listed available ...,21:30:11
...,...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05,Giannis Antetokounmpo (knee) listed questionab...,13:30:30
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-04,Giannis Antetokounmpo (knee) listed available ...,21:30:19
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) a g...,20:34:11
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) wen...,13:20:24


In [82]:
giannis_df["URL"][2]

'https://twitter.com/Underdog__NBA/status/1640405920579567646'

In [83]:
giannis_df[giannis_df["Date"] == datetime.date(2023, 3, 9)] #how to get a tweets from a certain date

Unnamed: 0,URL,Date,Text,Time
10,https://twitter.com/Underdog__NBA/status/16338...,2023-03-09,Giannis Antetokounmpo (illness) listed out Thu...,13:30:31


### Cleaning the Text of the Tweets

In [84]:
import nltk
nltk.download('punkt')
nltk.download('stopwords')
from nltk import word_tokenize
import string
from nltk.corpus import stopwords

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [85]:
tweet = giannis_df.loc[giannis_df["Date"] == datetime.date(2023, 3, 10)]["Text"].item()
tweet

'Giannis Antetokounmpo (hand) listed questionable for\xa0Saturday.'

Ideally, the only words in this tweet that we care about is Giannis, Antetokounmpo, hand, questionable, Saturday. <br>
I want to keep the injury type in case I want to look more into it later on. <br>
I am not sure what the \xa0 represents.

#### Removing Punctuation and Stop Words from Tweets

In [86]:
def clean_tweet(tweet):
  """
  Removes puncutation from the tweet, tokenizes it, then filters out any stop words. Returns a LIST of the words in the cleaned tweet
  """
  tweet_no_punc = tweet.translate(str.maketrans("", "",string.punctuation)) #removing puncutation from text by giving a mapping table used to translate the text 
  #needs to be a better way
  word_tokens = word_tokenize(tweet_no_punc) #splitting up the words in the text
  stopwords_custom = set(stopwords.words('english')) 
  stopwords_custom.update(("listed", "status", "alert", "ruled"))
  filtered_tweet = [w for w in word_tokens if not w.lower() in stopwords_custom] #removing all filter words
  return filtered_tweet

In [87]:
def words_to_tweet (words_lst):
  """
  Converts list of words in the tweet back to the tweet itself. Used after cleaning the tweet.
  """
  return ' '.join(words_lst)

In [88]:
def get_clean_tweets(df):
  """
  Creates a new column for the cleaned tweet, then for each tweet, we call clean_tweet and assign it in the new column
  """
  df["Cleaned Text"] = ""
  for ind, tweet in enumerate(df['Text']):
      clean_words_lst = clean_tweet(tweet)
      df["Cleaned Text"][ind] = words_to_tweet(clean_words_lst)
  return df

In [89]:
clean_df = get_clean_tweets(giannis_df)
clean_df

Unnamed: 0,URL,Date,Text,Time,Cleaned Text
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29,Giannis Antetokounmpo (knee) will play Wednesday.,17:38:27,Giannis Antetokounmpo knee play Wednesday
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28,Giannis Antetokounmpo (knee) listed probable f...,16:30:48,Giannis Antetokounmpo knee probable Wednesday
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27,Giannis Antetokounmpo (knee) listed out Monday.,13:30:23,Giannis Antetokounmpo knee Monday
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26,Giannis Antetokounmpo (knee) listed probable f...,16:31:23,Giannis Antetokounmpo knee probable Monday
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-13,Giannis Antetokounmpo (hand) listed available ...,21:30:11,Giannis Antetokounmpo hand available play Monday
...,...,...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05,Giannis Antetokounmpo (knee) listed questionab...,13:30:30,Giannis Antetokounmpo knee questionable Saturday
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-04,Giannis Antetokounmpo (knee) listed available ...,21:30:19,Giannis Antetokounmpo knee available play Friday
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) a g...,20:34:11,Giannis Antetokounmpo knee gametime decision r...
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) wen...,13:20:24,Giannis Antetokounmpo knee went shootaround Fr...


In [90]:
clean_df.loc[clean_df["Date"] == datetime.date(2023, 3, 10)]["Cleaned Text"].item()

'Giannis Antetokounmpo hand questionable Saturday'

#### Finding More Unecessary Words



We removed stopwords, but there are words that I don't really want, such as "listed" and "status". <br>
Let's find the most frequent words. <br>
Note that in function clean_tweet(), I have already added the words that I don't want to appear in the clean tweet versions in the stop words, so when I create a counter of all the words in the tweet, you will not see those words.


In [91]:
from collections import Counter

In [92]:
#First, let's get the list of lists of words in the tweets
lst_lst_words = [clean_tweet(tweet) for tweet in giannis_df["Text"]]
#lst_lst_words

In [93]:
#Then, let's flatten the nested lists to one list
flatten = [word for lst in lst_lst_words for word in lst]
#flatten

In [94]:
#now, let's count
Counter(flatten).most_common()

[('Giannis', 108),
 ('Antetokounmpo', 107),
 ('knee', 72),
 ('probable', 36),
 ('Monday', 26),
 ('play', 23),
 ('Friday', 19),
 ('Saturday', 16),
 ('available', 15),
 ('Thursday', 15),
 ('Tuesday', 13),
 ('questionable', 10),
 ('wrist', 10),
 ('Sunday', 9),
 ('Wednesday', 7),
 ('remains', 7),
 ('quad', 6),
 ('hand', 5),
 ('gametime', 4),
 ('decision', 4),
 ('remain', 4),
 ('injury', 4),
 ('report', 4),
 ('illness', 3),
 ('return', 3),
 ('calf', 3),
 ('expected', 2),
 ('doubtful', 2),
 ('headed', 2),
 ('locker', 2),
 ('room', 2),
 ('start', 2),
 ('limited', 2),
 ('practice', 2),
 ('MarJon', 2),
 ('Beauchamp', 2),
 ('practices', 2),
 ('went', 2),
 ('shootaround', 2),
 ('ankle', 2),
 ('EbireMoses', 1),
 ('still', 1),
 ('word', 1),
 ('Gannis', 1),
 ('wont', 1),
 ('leg', 1),
 ('diagnosed', 1),
 ('sprained', 1),
 ('ligament', 1),
 ('pain', 1),
 ('swelling', 1),
 ('subside', 1),
 ('undergo', 1),
 ('testing', 1),
 ('injured', 1),
 ('’', 1),
 ('Brook', 1),
 ('Lopez', 1),
 ('2023', 1),
 ('East',

Words we don't like
1. listed
3. status
4. alert
6. ruled

### Convert the references to weekdays to actual dates
Purpose: to match with the dates for betting data


In [95]:
import calendar
from datetime import timedelta

In [96]:
clean_df.loc[clean_df["Date"] == datetime.date(2023, 3, 10)]

Unnamed: 0,URL,Date,Text,Time,Cleaned Text
9,https://twitter.com/Underdog__NBA/status/16342...,2023-03-10,Giannis Antetokounmpo (hand) listed questionab...,14:31:18,Giannis Antetokounmpo hand questionable Saturday


March 10, 2023 was a Friday, so Underdog__NBA simply saying Saturday to refer to a game the next day makes sense. But, in the future, we don't know off the top of our heads that the 3/10/23 is a Friday without looking it up, and we don't know if Saturday might refer to 3/10 or to the next day. Therefore, we need to translate that Saturday into the date that the Bucks play.

My Strategy
1. Take out the weekdays in the tweets and put them into a new column
2. Convert the weekday into number
3. Then we add that to the date of the tweet and we should get date of game being referred to

In [135]:
def extract_days_of_week(text):
  """
  Extracts days of the week from the text by removing the day of the week from the text, then adding the day of the week into a new column. Returns those two columns
  """
  days_of_week = list(calendar.day_name) # words to be removed, days of the week
  words = text.split() # split the text into words
  extracted_words = [w for w in words if w in days_of_week] # extract the words to be removed
  new_text = ' '.join([w for w in words if w not in days_of_week]) # remove the words from the original text
  return pd.Series({'text': new_text, 'extracted_words': ' '.join(extracted_words)}) # return the modified text and extracted words as a Series



In [98]:
#apply the function to every entry in the dataframe
giannis_df[['Cleaned Text', 'Game weekday']] = giannis_df['Cleaned Text'].apply(extract_days_of_week)


Thanks, ChatGPT! If you type "In Python, I currently have a pandas dataframe and I have a column that contains strings. In that column, there are certain words that I want to remove from that column and put into a new column in the dataframe. How would I do that?" you will get the same code as above.

In [105]:
#remove tweets that dont refer to weekdays
giannis_df = giannis_df.drop(giannis_df[giannis_df["Game weekday"] == ""].index)
giannis_df

Unnamed: 0,URL,Date,Text,Time,Cleaned Text,Game weekday
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29,Giannis Antetokounmpo (knee) will play Wednesday.,17:38:27,Giannis Antetokounmpo knee play,Wednesday
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28,Giannis Antetokounmpo (knee) listed probable f...,16:30:48,Giannis Antetokounmpo knee probable,Wednesday
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27,Giannis Antetokounmpo (knee) listed out Monday.,13:30:23,Giannis Antetokounmpo knee,Monday
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26,Giannis Antetokounmpo (knee) listed probable f...,16:31:23,Giannis Antetokounmpo knee probable,Monday
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-13,Giannis Antetokounmpo (hand) listed available ...,21:30:11,Giannis Antetokounmpo hand available play,Monday
...,...,...,...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05,Giannis Antetokounmpo (knee) listed questionab...,13:30:30,Giannis Antetokounmpo knee questionable,Saturday
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-04,Giannis Antetokounmpo (knee) listed available ...,21:30:19,Giannis Antetokounmpo knee available play,Friday
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) a g...,20:34:11,Giannis Antetokounmpo knee gametime decision r...,Friday
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) wen...,13:20:24,Giannis Antetokounmpo knee went shootaround re...,Friday


In [107]:
#now numerate the weekday so we can add to the date

# create a mapping of weekday names to numbers
weekday_map = {'Monday': 0, 'Tuesday': 1, 'Wednesday': 2, 'Thursday': 3, 'Friday': 4, 'Saturday': 5, 'Sunday': 6}

# replace weekday names with numbers
giannis_df['day_of_week'] = giannis_df['Game weekday'].map(weekday_map).astype(int)

In [136]:
giannis_df

Unnamed: 0,URL,Date,Text,Time,Cleaned Text,Game weekday,day_of_week,Game Date
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29,Giannis Antetokounmpo (knee) will play Wednesday.,17:38:27,Giannis Antetokounmpo knee play,Wednesday,2,2023-03-31
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28,Giannis Antetokounmpo (knee) listed probable f...,16:30:48,Giannis Antetokounmpo knee probable,Wednesday,2,2023-03-30
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27,Giannis Antetokounmpo (knee) listed out Monday.,13:30:23,Giannis Antetokounmpo knee,Monday,0,2023-03-27
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26,Giannis Antetokounmpo (knee) listed probable f...,16:31:23,Giannis Antetokounmpo knee probable,Monday,0,2023-03-26
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-13,Giannis Antetokounmpo (hand) listed available ...,21:30:11,Giannis Antetokounmpo hand available play,Monday,0,2023-03-13
...,...,...,...,...,...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05,Giannis Antetokounmpo (knee) listed questionab...,13:30:30,Giannis Antetokounmpo knee questionable,Saturday,5,2022-11-10
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-04,Giannis Antetokounmpo (knee) listed available ...,21:30:19,Giannis Antetokounmpo knee available play,Friday,4,2022-11-08
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) a g...,20:34:11,Giannis Antetokounmpo knee gametime decision r...,Friday,4,2022-11-08
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) wen...,13:20:24,Giannis Antetokounmpo knee went shootaround re...,Friday,4,2022-11-08


In [132]:
#getting the game date

# convert the date series to datetime series
datetime_series = pd.to_datetime(giannis_df["Date"])

# create a Timedelta using the int series
offset = pd.to_timedelta(giannis_df["day_of_week"], unit='d')

# add the offset to the datetime series
new_datetime_series = datetime_series + offset

# convert the datetime series back to date series
giannis_df["Game Date"] = new_datetime_series.dt.date



In [133]:
giannis_df

Unnamed: 0,URL,Date,Text,Time,Cleaned Text,Game weekday,day_of_week,Game Date
0,https://twitter.com/Underdog__NBA/status/16411...,2023-03-29,Giannis Antetokounmpo (knee) will play Wednesday.,17:38:27,Giannis Antetokounmpo knee play,Wednesday,2,2023-03-31
1,https://twitter.com/Underdog__NBA/status/16408...,2023-03-28,Giannis Antetokounmpo (knee) listed probable f...,16:30:48,Giannis Antetokounmpo knee probable,Wednesday,2,2023-03-30
2,https://twitter.com/Underdog__NBA/status/16404...,2023-03-27,Giannis Antetokounmpo (knee) listed out Monday.,13:30:23,Giannis Antetokounmpo knee,Monday,0,2023-03-27
3,https://twitter.com/Underdog__NBA/status/16400...,2023-03-26,Giannis Antetokounmpo (knee) listed probable f...,16:31:23,Giannis Antetokounmpo knee probable,Monday,0,2023-03-26
4,https://twitter.com/Underdog__NBA/status/16354...,2023-03-13,Giannis Antetokounmpo (hand) listed available ...,21:30:11,Giannis Antetokounmpo hand available play,Monday,0,2023-03-13
...,...,...,...,...,...,...,...,...
103,https://twitter.com/Underdog__NBA/status/15889...,2022-11-05,Giannis Antetokounmpo (knee) listed questionab...,13:30:30,Giannis Antetokounmpo knee questionable,Saturday,5,2022-11-10
104,https://twitter.com/Underdog__NBA/status/15887...,2022-11-04,Giannis Antetokounmpo (knee) listed available ...,21:30:19,Giannis Antetokounmpo knee available play,Friday,4,2022-11-08
105,https://twitter.com/Underdog__NBA/status/15886...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) a g...,20:34:11,Giannis Antetokounmpo knee gametime decision r...,Friday,4,2022-11-08
106,https://twitter.com/Underdog__NBA/status/15885...,2022-11-04,Status alert: Giannis Antetokounmpo (knee) wen...,13:20:24,Giannis Antetokounmpo knee went shootaround re...,Friday,4,2022-11-08


We are going to get the Bucks

In [41]:
import nba_api
from nba_api.stats.endpoints import teamgamelog

In [42]:
bucks_gamelog = teamgamelog.TeamGameLog(team_id=1610612749).get_data_frames()[0]

In [43]:
bucks_gamelog

Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,FT_PCT,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS
0,1610612749,0022201174,"APR 02, 2023",MIL vs. PHI,W,56,22,0.718,240,46,...,0.714,7,35,42,28,8,5,9,17,117
1,1610612749,0022201146,"MAR 30, 2023",MIL vs. BOS,L,55,22,0.714,240,38,...,0.692,15,30,45,22,4,1,13,16,99
2,1610612749,0022201136,"MAR 29, 2023",MIL @ IND,W,55,21,0.724,240,58,...,0.767,14,38,52,28,5,5,18,18,149
3,1610612749,0022201122,"MAR 27, 2023",MIL @ DET,W,54,21,0.720,240,45,...,0.846,15,35,50,28,5,4,8,23,126
4,1610612749,0022201109,"MAR 25, 2023",MIL @ DEN,L,53,21,0.716,240,39,...,0.905,4,36,40,26,7,2,10,19,106
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,1610612749,0022200085,"OCT 29, 2022",MIL vs. ATL,W,5,0,1.000,240,45,...,0.655,14,35,49,24,8,5,11,18,123
74,1610612749,0022200075,"OCT 28, 2022",MIL vs. NYK,W,4,0,1.000,240,42,...,0.815,19,41,60,23,8,8,6,23,119
75,1610612749,0022200060,"OCT 26, 2022",MIL vs. BKN,W,3,0,1.000,240,39,...,0.759,14,40,54,21,8,12,18,22,110
76,1610612749,0022200033,"OCT 22, 2022",MIL vs. HOU,W,2,0,1.000,240,48,...,0.619,8,39,47,29,6,10,16,22,125


In [49]:
def convert_date(gamelog): 
  gamelog["GAME_DATETIME"] = ""
  mon = gamelog["GAME_DATE"].str[0:3]
  day = gamelog["GAME_DATE"].str[4:6]
  yr = gamelog["GAME_DATE"].str[-2:]
  for i in range(len(mon)):
    gamelog["GAME_DATETIME"][i] = datetime.strptime(mon[i] + day[i] + yr[i], "%b%d%y").date()

In [50]:
convert_date(bucks_gamelog)
bucks_gamelog

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gamelog["GAME_DATETIME"][i] = datetime.strptime(mon[i] + day[i] + yr[i], "%b%d%y").date()


Unnamed: 0,Team_ID,Game_ID,GAME_DATE,MATCHUP,WL,W,L,W_PCT,MIN,FGM,...,OREB,DREB,REB,AST,STL,BLK,TOV,PF,PTS,GAME_DATETIME
0,1610612749,0022201174,"APR 02, 2023",MIL vs. PHI,W,56,22,0.718,240,46,...,7,35,42,28,8,5,9,17,117,2023-04-02
1,1610612749,0022201146,"MAR 30, 2023",MIL vs. BOS,L,55,22,0.714,240,38,...,15,30,45,22,4,1,13,16,99,2023-03-30
2,1610612749,0022201136,"MAR 29, 2023",MIL @ IND,W,55,21,0.724,240,58,...,14,38,52,28,5,5,18,18,149,2023-03-29
3,1610612749,0022201122,"MAR 27, 2023",MIL @ DET,W,54,21,0.720,240,45,...,15,35,50,28,5,4,8,23,126,2023-03-27
4,1610612749,0022201109,"MAR 25, 2023",MIL @ DEN,L,53,21,0.716,240,39,...,4,36,40,26,7,2,10,19,106,2023-03-25
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73,1610612749,0022200085,"OCT 29, 2022",MIL vs. ATL,W,5,0,1.000,240,45,...,14,35,49,24,8,5,11,18,123,2022-10-29
74,1610612749,0022200075,"OCT 28, 2022",MIL vs. NYK,W,4,0,1.000,240,42,...,19,41,60,23,8,8,6,23,119,2022-10-28
75,1610612749,0022200060,"OCT 26, 2022",MIL vs. BKN,W,3,0,1.000,240,39,...,14,40,54,21,8,12,18,22,110,2022-10-26
76,1610612749,0022200033,"OCT 22, 2022",MIL vs. HOU,W,2,0,1.000,240,48,...,8,39,47,29,6,10,16,22,125,2022-10-22


Next Steps <br>
1. Make a word cloud (for fun)?
2. add words we don't want to the list

Observations of Data
1. uses weekdays to refer to when a game would be played.
2. sometimes there's weird text

In [None]:
giannis_df_date.to_csv('giannis_23.csv', sep=',', index=False)