# SQL Server Database

In [1]:
import sqlalchemy
from sqlalchemy import text
from functools import partial
import pandas as pd
from sql_connector import DB
import re

## Create Engine

In [2]:
nba = DB(db_name='NBA')
nba.test_connection()

2023-08-11 22:36:57,705 INFO sqlalchemy.engine.Engine SELECT CAST(SERVERPROPERTY('ProductVersion') AS VARCHAR)
2023-08-11 22:36:57,706 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-08-11 22:36:57,708 INFO sqlalchemy.engine.Engine SELECT schema_name()
2023-08-11 22:36:57,709 INFO sqlalchemy.engine.Engine [generated in 0.00059s] ()
2023-08-11 22:36:57,713 INFO sqlalchemy.engine.Engine SELECT CAST('test max support' AS NVARCHAR(max))
2023-08-11 22:36:57,713 INFO sqlalchemy.engine.Engine [generated in 0.00069s] ()
2023-08-11 22:36:57,716 INFO sqlalchemy.engine.Engine SELECT 1 FROM fn_listextendedproperty(default, default, default, default, default, default, default)
2023-08-11 22:36:57,716 INFO sqlalchemy.engine.Engine [generated in 0.00048s] ()
2023-08-11 22:36:57,721 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-08-11 22:36:57,721 INFO sqlalchemy.engine.Engine SELECT DB_NAME()
2023-08-11 22:36:57,722 INFO sqlalchemy.engine.Engine [generated in 0.00123s] ()
2023-08-11 22:36:57,726

True

## Create Table

### Columns to Create

In [4]:
player_shotLocations = pd.read_csv(r"D:\lianz\Desktop\Python\personal_projects\nba_airflow\data\player_shotLocations.csv", skipinitialspace=True, index_col=0)
games_played = pd.read_csv(r"D:\lianz\Desktop\Python\personal_projects\nba_airflow\data\games_played.csv", parse_dates=['DateTime'])
players = pd.read_csv(r"D:\lianz\Desktop\Python\personal_projects\nba_airflow\data\players.csv", parse_dates=['Birth Date'])
play_by_play = pd.read_parquet(r"D:\lianz\Desktop\Python\personal_projects\nba_airflow\data\play_by_play.parquet")

In [7]:
# Create function to create table in database
def create_table(engine, table_name, df):
    

IndentationError: expected an indented block (3255611815.py, line 3)

### Execute SQL to Create Table

In [None]:
create_team_table = text("""
CREATE TABLE teams(
team_id INT PRIMARY KEY NOT NULL,
team_name VARCHAR(20),
team_abbreviation CHAR(3)
)
""")

create_player_table = text("""
CREATE TABLE players(
player_id INT PRIMARY KEY,
player_name VARCHAR(30),
from INT,
to INT,
position VARCHAR(5),
height VARCHAR(5),
weight INT,
birth_date datetime,
college VARCHAR(255),
) 
""")

create_team_arena = text("""
CREATE TABLE arena_history(
arena_id INT PRIMARY KEY,
arena_name VARCHAR(30),
team_year INT,
team_id INT FOREIGN KEY REFERENCES teams(team_id)
)
""")

create_player_team_table = text("""
CREATE TABLE player_team_history(
id INT NOT NULL IDENTITY(1,1),
player_id INT FOREIGN KEY REFERENCES players(player_id),
player_age INT,
team_id INT FOREIGN KEY REFERENCES teams(team_id),
year VARCHAR(10)
)
""")

create_games_played = text("""
CREATE TABLE games_played(
game_id CHAR(18) PRIMARY KEY,
game_date datetime,
visitor_team_id INT FOREIGN KEY REFERENCES teams(team_id), 
visitor_pts INT NOT NULL,
home_team_id INT FOREIGN KEY REFERENCES teams(team_id),
home_pts INT NOT NULL,
overtime VARCHAR(5),
attendance INT,
arena_id INT
)
""")

with nba.engine.begin() as conn:
    conn.execute(create_team_table)
    conn.execute(create_player_table)
    conn.execute(create_team_arena)
    conn.execute(create_player_team_table)
    conn.execute(create_games_played)

## Add Columns to Table

In [None]:
add_arena_col = text("""
ALTER TABLE teams
ADD arena_name VARCHAR(30);
""")

with nba.engine.begin() as conn:
    conn.execute(add_arena_col)

## Insert Table

In [None]:
games_played = games_played.dropna(subset=['Arena'])
games_played['DateStr'].apply(lambda x: len(str(x)))

In [None]:
from sqlalchemy import DateTime, INT, VARCHAR

games_played.set_index('game_id').to_sql(name='games_played',
                    con=nba.engine,
                    if_exists='append',
                    dtype={'DateTime': DateTime,
                           'Visitor': VARCHAR(50),
                           'Visitor PTS': INT,
                           'Home': VARCHAR(50),
                           'Home PTS': INT,
                           'OT': VARCHAR(10),
                           'Attendance': INT, 
                           'Arena': VARCHAR(50), 
                           'DateStr': VARCHAR(12),
                           'Visitor_short': VARCHAR(3), 
                           'Home_short': VARCHAR(3),
                           'game_id': VARCHAR(18)})

In [None]:
teams_df = games_played.rename(columns={'Home':'team_name', 'Home_short':'team_abbreviation',})[['DateTime','team_name','team_abbreviation','Arena']].copy()
teams_df['year'] = teams_df['DateTime'].apply(lambda x: str(x.year))
teams_df.drop(columns=['DateTime'], inplace=True)
teams_df = teams_df.dropna(subset=['Arena']).drop_duplicates()
teams_df['team_id'] = teams_df['team_abbreviation'] + teams_df['year']

### Execute SQL Statement to INSERT

In [None]:
player_shotLocations[['PLAYER_ID', 'PLAYER_NAME','NICKNAME']].drop_duplicates('PLAYER_ID').to_sql('players',nba.engine, index=False)

In [None]:
insert_table = text("""
INSERT INTO teams (team_id, team_name)
VALUES (:player_id, :player_name)
""")

with nba.engine.begin() as conn:
    conn.execute(insert_table, [{"player_id": 1, "player_name": 1}, {"player_id": 2, "player_name": 4}],)

# Drop Table

In [45]:
# Create function to drop table, can take in a list of tables
def drop_table(table_list):
    for table in table_list:
        table.drop(engine)
        print(f"Table {table} dropped")



In [37]:
drop_table = text("""
DROP TABLE player_team_history;
DROP TABLE games_played;
DROP TABLE arena_history;
DROP TABLE players;
DROP TABLE teams;

""")

with nba.engine.begin() as conn:
    conn.execute(drop_table)

2023-08-11 22:30:43,797 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-08-11 22:30:43,798 INFO sqlalchemy.engine.Engine 
DROP TABLE player_team_history;
DROP TABLE games_played;
DROP TABLE arena_history;
DROP TABLE players;
DROP TABLE teams;


2023-08-11 22:30:43,798 INFO sqlalchemy.engine.Engine [generated in 0.00077s] ()
2023-08-11 22:30:43,831 INFO sqlalchemy.engine.Engine ROLLBACK


ProgrammingError: (pyodbc.ProgrammingError) ('42S02', "[42S02] [Microsoft][ODBC Driver 17 for SQL Server][SQL Server]Cannot drop the table 'player_team_history', because it does not exist or you do not have permission. (3701) (SQLExecDirectW)")
[SQL: 
DROP TABLE player_team_history;
DROP TABLE games_played;
DROP TABLE arena_history;
DROP TABLE players;
DROP TABLE teams;

]
(Background on this error at: https://sqlalche.me/e/20/f405)

# Drop Column from Table

In [None]:
column_to_drop = '[index]'
table_name = 'play_by_play'
drop_column = text(f"""
ALTER TABLE {table_name}
DROP COLUMN {column_to_drop}
""")

with nba.engine.begin() as conn:
    conn.execute(drop_column)
    print(f'Column {column_to_drop} has been dropped from table {table_name}')

# Access Data from Table

In [8]:
with nba.engine.connect() as conn:
    pbp = conn.execute(text("SELECT DISTINCT player_name FROM play_by_play"))
    rows = pbp.fetchall()

pbp_df = pd.DataFrame(rows, columns=pbp.keys(),)

2023-08-09 00:27:20,753 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-08-09 00:27:20,754 INFO sqlalchemy.engine.Engine SELECT DISTINCT player_name FROM play_by_play
2023-08-09 00:27:20,755 INFO sqlalchemy.engine.Engine [generated in 0.00143s] ()
2023-08-09 00:27:21,951 INFO sqlalchemy.engine.Engine ROLLBACK


In [9]:
special_names = [re.findall(r"[^a-z^A-Z^\s^'^-]", i) for i in pbp_df['player_name'] if re.findall(r"[^a-z^A-Z^\s^'^-]", i)]

In [12]:
chars = []

for i in special_names:
    chars.extend(i)

"".join(list(set(chars)))

'öÖèãáëžÁïéíýšêòçŽúŠÓäßôóü'

In [None]:
pbp_df['player_name'].apply(lambda x: len(x)).max()

In [13]:
def get_data(database, game_id, player_name=None,):
    with database.engine.connect() as conn:
        pbp = conn.execute(
            text(f"SELECT * FROM play_by_play WHERE game_id = '{game_id}'"))
        rows = pbp.fetchall()
    pbp_df = pd.DataFrame(rows, columns=pbp.keys())
    return pbp_df

get_data(nba, '202304301300MIANYK')

2023-08-09 00:28:16,873 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-08-09 00:28:16,874 INFO sqlalchemy.engine.Engine SELECT * FROM play_by_play WHERE game_id = '202304301300MIANYK'
2023-08-09 00:28:16,875 INFO sqlalchemy.engine.Engine [generated in 0.00146s] ()
2023-08-09 00:28:17,134 INFO sqlalchemy.engine.Engine ROLLBACK


Unnamed: 0,index,player_name,time_left,team_name,score_status,x_shot_pos,y_shot_pos,quarter,shot_status,full_text,datetime,game_id,player_id
0,0,Gabe Vincent,11:41.0,Miami,Miami now leads 2-0,77,405,1st quarter,make,"1st quarter, 11:41.0 remaining<br>Gabe Vincent...",2023-04-30 13:00:00,202304301300MIANYK,680
1,1,Gabe Vincent,11:20.0,Miami,Miami now leads 5-0,358,223,1st quarter,make,"1st quarter, 11:20.0 remaining<br>Gabe Vincent...",2023-04-30 13:00:00,202304301300MIANYK,680
2,2,Gabe Vincent,10:14.0,Miami,Miami leads 5-2,469,451,1st quarter,miss,"1st quarter, 10:14.0 remaining<br>Gabe Vincent...",2023-04-30 13:00:00,202304301300MIANYK,680
3,3,Jimmy Butler,9:51.0,Miami,Miami leads 5-2,283,417,1st quarter,miss,"1st quarter, 9:51.0 remaining<br>Jimmy Butler ...",2023-04-30 13:00:00,202304301300MIANYK,1034
4,4,Bam Adebayo,9:49.0,Miami,Miami leads 5-2,240,439,1st quarter,miss,"1st quarter, 9:49.0 remaining<br>Bam Adebayo m...",2023-04-30 13:00:00,202304301300MIANYK,141
...,...,...,...,...,...,...,...,...,...,...,...,...,...
166,166,Josh Hart,0:34.0,New York,New York trails 95-106,333,200,4th quarter,miss,"4th quarter, 0:34.0 remaining<br>Josh Hart mis...",2023-04-30 13:00:00,202304301300MIANYK,1090
167,167,Jalen Brunson,0:30.0,New York,New York trails 95-106,112,222,4th quarter,miss,"4th quarter, 0:30.0 remaining<br>Jalen Brunson...",2023-04-30 13:00:00,202304301300MIANYK,846
168,168,Josh Hart,0:27.0,New York,New York now trails 97-106,240,439,4th quarter,make,"4th quarter, 0:27.0 remaining<br>Josh Hart mad...",2023-04-30 13:00:00,202304301300MIANYK,1090
169,169,Jalen Brunson,0:17.0,New York,New York now trails 99-107,212,359,4th quarter,make,"4th quarter, 0:17.0 remaining<br>Jalen Brunson...",2023-04-30 13:00:00,202304301300MIANYK,846
