In [1]:
%reset -fs
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pandas_profiling as pp
from fuzzywuzzy import fuzz
from fuzzywuzzy import process
pd.set_option('display.max_columns', 200)
pd.set_option('display.max_rows', 200)

In [2]:
bet_df = pd.read_csv('betting_data.csv', index_col=0)

In [3]:
bet_df.Date = pd.to_datetime(bet_df.Date, infer_datetime_format=True)

In [4]:
stat_df = pd.read_csv('stathead_data.csv', index_col=0)

In [5]:
stat_df.rename(columns={'Unnamed: 5': 'Home/Away', 'Unnamed: 7':'Game Result'}, inplace=True)

In [6]:
stat_df['Home/Away'] = stat_df['Home/Away'].fillna('Home')
stat_df['Home/Away'] = stat_df['Home/Away'].replace('@','Away')

In [7]:
stat_df.Date = pd.to_datetime(stat_df.Date, infer_datetime_format=True)

In [8]:
players = stat_df.Player.str.split('\\', expand=True)

In [9]:
stat_df.Player = players[0]

In [10]:
stat_df = stat_df.sort_values(by=['Player', 'Date'], axis=0)

In [11]:
team_stat_df = pd.read_csv('teamstats.csv', index_col=0)

In [12]:
team_stat_df.rename(columns={'Unnamed: 3': 'Home/Away'}, inplace=True)
team_stat_df['Home/Away'] = team_stat_df['Home/Away'].fillna('Home')
team_stat_df['Home/Away'] = team_stat_df['Home/Away'].replace('@','Away')

In [13]:
team_stat_df['Date'] = pd.to_datetime(team_stat_df.Date, infer_datetime_format=True)

In [14]:
merged_df_1 = pd.merge(stat_df, team_stat_df,  how='left', left_on=['Date','Tm', 'Opp'], right_on = ['Date','Tm', 'Opp'])


In [15]:
merged_df_1.to_csv('player_team_stats_merged.csv')

In [15]:
merged_df_1.Player.unique()

array(['Aaron Gordon', 'Aaron Henry', 'Aaron Holiday', 'Aaron Nesmith',
       'Aaron Wiggins', 'Abdel Nader', 'Ade Murkey', 'Admiral Schofield',
       'Ahmad Caver', 'Al Horford', 'Alec Burks', 'Aleem Ford',
       'Aleksej Pokusevski', 'Alex Caruso', 'Alex Len',
       'Alfonzo McKinnie', 'Alize Johnson', 'Alperen Şengün',
       'Amir Coffey', 'Andre Drummond', 'Andre Iguodala',
       'Andrew Wiggins', 'Anfernee Simons', 'Anthony Davis',
       'Anthony Edwards', 'Anthony Gill', 'Anthony Lamb', 'Armoni Brooks',
       'Arnoldas Kulboka', 'Austin Reaves', 'Austin Rivers',
       'Avery Bradley', 'Ayo Dosunmu', 'B.J. Johnson', 'Bam Adebayo',
       'Ben McLemore', 'Bismack Biyombo', 'Blake Griffin',
       'Boban Marjanović', 'Bobby Portis', 'Bogdan Bogdanović',
       'Bojan Bogdanović', 'Bol Bol', 'Bones Hyland', 'Brad Wanamaker',
       'Bradley Beal', 'Brandon Boston Jr.', 'Brandon Clarke',
       'Brandon Goodwin', 'Brandon Ingram', 'Brandon Knight',
       'Brandon Williams', 

In [16]:
bet_df.dtypes

Date                  datetime64[ns]
Name                          object
Team                          object
Position                      object
Opp                           object
Home/Away                     object
Line                         float64
Moneyline                      int64
Projection                   float64
Diff                         float64
Pick                         float64
Result                         int64
pick_over_under               object
seven_day_accuracy           float64
successful_pick                int64
dtype: object

In [17]:
merged_df_1.dtypes

Player                 object
Age                    object
Pos                    object
Tm                     object
Home/Away_x            object
Opp                    object
Game Result            object
Date           datetime64[ns]
GS                      int64
MP_x                    int64
FG_x                    int64
FGA_x                   int64
FG%_x                 float64
2P_x                    int64
2PA_x                   int64
2P%_x                 float64
3P_x                    int64
3PA_x                   int64
3P%_x                 float64
FT_x                    int64
FTA_x                   int64
FT%_x                 float64
TS%                   float64
ORB                     int64
DRB                     int64
TRB                     int64
AST                     int64
STL                     int64
BLK                     int64
TOV                     int64
PF                      int64
PTS_x                   int64
GmSc                  float64
BPM       

In [18]:
bet_df

Unnamed: 0,Date,Name,Team,Position,Opp,Home/Away,Line,Moneyline,Projection,Diff,Pick,Result,pick_over_under,seven_day_accuracy,successful_pick
0,2021-10-21,Bogdan,ATL,G,DAL,vs.,15.5,-115,16.57,1.07,16.0,11,o,0.47,0
1,2021-10-23,Bogdan,ATL,G,CLE,@,14.5,-115,15.29,0.79,14.5,5,o,0.45,0
2,2021-10-25,Bogdan,ATL,G,DET,vs.,15.5,-115,12.85,-2.65,13.5,14,u,0.45,0
3,2021-10-28,Bogdan,ATL,G,WAS,@,12.5,-105,16.72,4.22,12.5,14,o,0.42,1
4,2021-10-30,Bogdan,ATL,G,PHI,@,12.5,-113,15.71,3.21,12.5,13,o,0.45,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4621,2021-12-04,Zeke Nnaji,DEN,F,NYK,@,5.5,-105,2.91,-2.59,5.5,21,u,0.55,0
4622,2021-12-06,Zeke Nnaji,DEN,F,CHI,@,7.5,115,3.35,-4.15,7.5,3,u,0.51,1
4623,2021-10-28,Ziaire Williams,MEM,G,GSW,@,5.5,-130,4.33,-1.17,5.5,4,u,0.42,1
4624,2022-01-17,Ziaire Williams,MEM,G,CHI,vs.,8.5,-105,6.11,-2.39,8.5,2,u,0.58,1


In [21]:
mat1 = []
mat2 = []


list1 = bet_df['Name'].tolist()
list2 = merged_df_1['Player'].tolist()

threshold=75

for i in list1:
    mat1.append(process.extract(i, list2, limit=2))
bet_df['Matches'] = mat1
bet_df

Unnamed: 0,Date,Name,Team,Position,Opp,Home/Away,Line,Moneyline,Projection,Diff,Pick,Result,pick_over_under,seven_day_accuracy,successful_pick,Matches
0,2021-10-21,Bogdan,ATL,G,DAL,vs.,15.5,-115,16.57,1.07,16.0,11,o,0.47,0,"[(Bogdan Bogdanović, 90), (Bogdan Bogdanović, ..."
1,2021-10-23,Bogdan,ATL,G,CLE,@,14.5,-115,15.29,0.79,14.5,5,o,0.45,0,"[(Bogdan Bogdanović, 90), (Bogdan Bogdanović, ..."
2,2021-10-25,Bogdan,ATL,G,DET,vs.,15.5,-115,12.85,-2.65,13.5,14,u,0.45,0,"[(Bogdan Bogdanović, 90), (Bogdan Bogdanović, ..."
3,2021-10-28,Bogdan,ATL,G,WAS,@,12.5,-105,16.72,4.22,12.5,14,o,0.42,1,"[(Bogdan Bogdanović, 90), (Bogdan Bogdanović, ..."
4,2021-10-30,Bogdan,ATL,G,PHI,@,12.5,-113,15.71,3.21,12.5,13,o,0.45,1,"[(Bogdan Bogdanović, 90), (Bogdan Bogdanović, ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4621,2021-12-04,Zeke Nnaji,DEN,F,NYK,@,5.5,-105,2.91,-2.59,5.5,21,u,0.55,0,"[(Zeke Nnaji, 100), (Zeke Nnaji, 100)]"
4622,2021-12-06,Zeke Nnaji,DEN,F,CHI,@,7.5,115,3.35,-4.15,7.5,3,u,0.51,1,"[(Zeke Nnaji, 100), (Zeke Nnaji, 100)]"
4623,2021-10-28,Ziaire Williams,MEM,G,GSW,@,5.5,-130,4.33,-1.17,5.5,4,u,0.42,1,"[(Ziaire Williams, 100), (Ziaire Williams, 100)]"
4624,2022-01-17,Ziaire Williams,MEM,G,CHI,vs.,8.5,-105,6.11,-2.39,8.5,2,u,0.58,1,"[(Ziaire Williams, 100), (Ziaire Williams, 100)]"


In [22]:
bet_df.to_csv('fuzzy_matches_bet_df.csv')