# Tabla Draft

In [1]:
import numpy as np
import pandas as pd


# Carga de draft_history

In [2]:
draft_history = pd.read_csv('draft_history.csv')

In [3]:
np.shape(draft_history)

(7990, 14)

### Vista preeliminar de los datos

In [4]:
draft_history.head()

Unnamed: 0,person_id,player_name,season,round_number,round_pick,overall_pick,draft_type,team_id,team_city,team_name,team_abbreviation,organization,organization_type,player_profile_flag
0,79299,Clifton McNeeley,1947,1,1,1,Draft,1610610031,Pittsburgh,Ironmen,PIT,Texas-El Paso,College/University,0
1,78109,Glen Selbo,1947,1,2,2,Draft,1610610035,Toronto,Huskies,HUS,Wisconsin,College/University,1
2,76649,Eddie Ehlers,1947,1,3,3,Draft,1610612738,Boston,Celtics,BOS,Purdue,College/University,1
3,79302,Walt Dropo,1947,1,4,4,Draft,1610610032,Providence,Steamrollers,PRO,Connecticut,College/University,0
4,77048,Dick Holub,1947,1,5,5,Draft,1610612752,New York,Knicks,NYK,Long Island-Brooklyn,College/University,1


### Valores nulos

In [5]:
draft_history.isnull().sum()

person_id               0
player_name             0
season                  0
round_number            0
round_pick              0
overall_pick            0
draft_type              0
team_id                 0
team_city               0
team_name               0
team_abbreviation       0
organization           19
organization_type      19
player_profile_flag     0
dtype: int64

### Tipo de datos de las columnas

In [6]:
draft_history.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7990 entries, 0 to 7989
Data columns (total 14 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   person_id            7990 non-null   int64 
 1   player_name          7990 non-null   object
 2   season               7990 non-null   int64 
 3   round_number         7990 non-null   int64 
 4   round_pick           7990 non-null   int64 
 5   overall_pick         7990 non-null   int64 
 6   draft_type           7990 non-null   object
 7   team_id              7990 non-null   int64 
 8   team_city            7990 non-null   object
 9   team_name            7990 non-null   object
 10  team_abbreviation    7990 non-null   object
 11  organization         7971 non-null   object
 12  organization_type    7971 non-null   object
 13  player_profile_flag  7990 non-null   int64 
dtypes: int64(7), object(7)
memory usage: 874.0+ KB


In [7]:
season_unique = draft_history['season'].unique()
print(season_unique)

[1947 1948 1949 1950 1951 1952 1953 1954 1956 1957 1958 1959 1960 1961
 1962 1963 1964 1965 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976
 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990
 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004
 2005 2006 2007 2008 2009 2010 2011 2012 2013 2015 2016 2017 2018 2019
 2020 2021 2022 2023]


### Filtrado por season a partir de 2003 (jugadores activos)

In [8]:
# Filtro el dataframe a partir de 2003 y lo guardo en un nuevo dataframe.
draft_history_season=draft_history[draft_history['season'] >= 2003]

In [9]:
draft_history_season.head()

Unnamed: 0,person_id,player_name,season,round_number,round_pick,overall_pick,draft_type,team_id,team_city,team_name,team_abbreviation,organization,organization_type,player_profile_flag
6797,2544,LeBron James,2003,1,1,1,Draft,1610612739,Cleveland,Cavaliers,CLE,Saint Vincent-Saint Mary,High School,1
6798,2545,Darko Milicic,2003,1,2,2,Draft,1610612765,Detroit,Pistons,DET,KK Vrsac (Serbia),Other Team/Club,1
6799,2546,Carmelo Anthony,2003,1,3,3,Draft,1610612743,Denver,Nuggets,DEN,Syracuse,College/University,1
6800,2547,Chris Bosh,2003,1,4,4,Draft,1610612761,Toronto,Raptors,TOR,Georgia Tech,College/University,1
6801,2548,Dwyane Wade,2003,1,5,5,Draft,1610612748,Miami,Heat,MIA,Marquette,College/University,1


### Reviso los valores nulos

In [10]:
draft_history_season.isnull().sum()

person_id              0
player_name            0
season                 0
round_number           0
round_pick             0
overall_pick           0
draft_type             0
team_id                0
team_city              0
team_name              0
team_abbreviation      0
organization           2
organization_type      2
player_profile_flag    0
dtype: int64

In [11]:
draft_history_season = draft_history_season.fillna(0)

### N° de filas y columnas despues del filtro de season

In [12]:
np.shape(draft_history_season)

(1193, 14)

### Explicacion de las columnas:


round_number: Indica el número de la ronda en la que un jugador fue seleccionado durante el Draft. El Draft de la NBA tiene varias rondas, y cada ronda tiene un conjunto de selecciones.

round_pick: Se refiere al número específico del pick (elección) dentro de una ronda en la que un jugador fue seleccionado. Por ejemplo, el "round_pick" 5 en la primera ronda significa que el jugador fue el quinto en ser elegido en esa ronda.

overall_pick: Este es el número total de la elección en el Draft, contando todas las rondas. Por ejemplo, el "overall_pick" 15 significa que el jugador fue el 15º elegido en el Draft, sin importar en qué ronda.

draft_type: Especifica el tipo de Draft. Puede referirse al Draft general, como el Draft de la NBA, o a un Draft específico como el Draft de la NBA de 2024.

organization: Indica el equipo de la NBA que seleccionó al jugador en el Draft. Esto muestra qué equipo tiene los derechos sobre el jugador seleccionado.

organization_type: Se refiere al tipo de organización que está realizando la selección. En el contexto del Draft de la NBA, esto generalmente se refiere a los equipos de la NBA.

player_profile_flag: Es una bandera o indicador que puede mostrar si hay información adicional disponible sobre el perfil del jugador. Esto podría incluir detalles como si el jugador ha sido seleccionado en el Draft, si tiene alguna nota especial o si su perfil está completo.

## Eliminacion de columnas innecesarias


In [13]:
draft_history_season_columns_drop = ['draft_type', 'player_profile_flag']
draft_history_season_clear = draft_history_season.drop(columns=draft_history_season_columns_drop)

### Seteo de corchetes para evitar problemas debido del uso del / dentro de los valores de la columna organization_type

In [14]:
draft_history_season_clear['organization_type'] = draft_history_season_clear['organization_type'].apply(lambda x: f"[{x}]")

In [15]:
draft_history_season_clear.rename(columns={'person_id': 'draft_id'}, inplace=True)

In [16]:
draft_history_season_clear.rename(columns={'season': 'draft_season'}, inplace=True)

In [17]:
draft_history_season_clear.head()

Unnamed: 0,draft_id,player_name,draft_season,round_number,round_pick,overall_pick,team_id,team_city,team_name,team_abbreviation,organization,organization_type
6797,2544,LeBron James,2003,1,1,1,1610612739,Cleveland,Cavaliers,CLE,Saint Vincent-Saint Mary,[High School]
6798,2545,Darko Milicic,2003,1,2,2,1610612765,Detroit,Pistons,DET,KK Vrsac (Serbia),[Other Team/Club]
6799,2546,Carmelo Anthony,2003,1,3,3,1610612743,Denver,Nuggets,DEN,Syracuse,[College/University]
6800,2547,Chris Bosh,2003,1,4,4,1610612761,Toronto,Raptors,TOR,Georgia Tech,[College/University]
6801,2548,Dwyane Wade,2003,1,5,5,1610612748,Miami,Heat,MIA,Marquette,[College/University]


# Carga de draft_combine_stats

In [18]:
draft_stats = pd.read_csv('draft_combine_stats.csv')

In [19]:
np.shape(draft_stats)

(1202, 47)

### Vista preeliminar de los datos

In [20]:
draft_stats.head()

Unnamed: 0,season,player_id,first_name,last_name,player_name,position,height_wo_shoes,height_wo_shoes_ft_in,height_w_shoes,height_w_shoes_ft_in,...,spot_nba_break_right,spot_nba_corner_right,off_drib_fifteen_break_left,off_drib_fifteen_top_key,off_drib_fifteen_break_right,off_drib_college_break_left,off_drib_college_top_key,off_drib_college_break_right,on_move_fifteen,on_move_college
0,2001,12033,Adam,Allenspach,Adam Allenspach,C,83.5,6' 11.5'',,,...,,,,,,,,,,
1,2001,2240,Gilbert,Arenas,Gilbert Arenas,SG,74.25,6' 2.25'',,,...,,,,,,,,,,
2,2001,2220,Brandon,Armstrong,Brandon Armstrong,SG,75.5,6' 3.5'',,,...,,,,,,,,,,
3,2001,2203,Shane,Battier,Shane Battier,SF-PF,80.25,6' 8.25'',,,...,,,,,,,,,,
4,2001,12034,Cookie,Belcher,Cookie Belcher,SG-PG,75.0,6' 3'',,,...,,,,,,,,,,


### Tipo de datos del dataframe

In [21]:
draft_stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1202 entries, 0 to 1201
Data columns (total 47 columns):
 #   Column                        Non-Null Count  Dtype  
---  ------                        --------------  -----  
 0   season                        1202 non-null   int64  
 1   player_id                     1202 non-null   int64  
 2   first_name                    1202 non-null   object 
 3   last_name                     1202 non-null   object 
 4   player_name                   1202 non-null   object 
 5   position                      1197 non-null   object 
 6   height_wo_shoes               1153 non-null   float64
 7   height_wo_shoes_ft_in         1153 non-null   object 
 8   height_w_shoes                1008 non-null   float64
 9   height_w_shoes_ft_in          1008 non-null   object 
 10  weight                        1152 non-null   float64
 11  wingspan                      1153 non-null   float64
 12  wingspan_ft_in                1153 non-null   object 
 13  sta

### Filtro por season a partir del 2003 (jugadores activos)

In [22]:
# Filtro el datafrme a partir del 2003 y lo guardo en un nuevo dataframe.
draft_stats_season=draft_stats[draft_stats['season'] >= 2003]

In [23]:
draft_stats_season.head()

Unnamed: 0,season,player_id,first_name,last_name,player_name,position,height_wo_shoes,height_wo_shoes_ft_in,height_w_shoes,height_w_shoes_ft_in,...,spot_nba_break_right,spot_nba_corner_right,off_drib_fifteen_break_left,off_drib_fifteen_top_key,off_drib_fifteen_break_right,off_drib_college_break_left,off_drib_college_top_key,off_drib_college_break_right,on_move_fifteen,on_move_college
78,2004,2754,Tony,Allen,Tony Allen,SG,75.5,6' 3.5'',76.25,6' 4.25'',...,,,,,,,,,,
79,2004,2772,Trevor,Ariza,Trevor Ariza,SG-SF,79.0,6' 7'',80.25,6' 8.25'',...,,,,,,,,,,
80,2004,2857,Andre,Barrett,Andre Barrett,PG,68.75,5' 8.75'',69.75,5' 9.75'',...,,,,,,,,,,
81,2004,101138,Brandon,Bass,Brandon Bass,PF,78.25,6' 6.25'',79.25,6' 7.25'',...,,,,,,,,,,
82,2004,2740,Andris,Biedrins,Andris Biedrins,C,,,,,...,,,,,,,,,,


In [24]:
np.shape(draft_stats_season)

(1124, 47)

### Visualizar los valores nulos

In [25]:
draft_stats_season.isnull().sum()

season                             0
player_id                          0
first_name                         0
last_name                          0
player_name                        0
position                           5
height_wo_shoes                   49
height_wo_shoes_ft_in             49
height_w_shoes                   116
height_w_shoes_ft_in             116
weight                            50
wingspan                          49
wingspan_ft_in                    49
standing_reach                    50
standing_reach_ft_in              50
body_fat_pct                     197
hand_length                      405
hand_width                       405
standing_vertical_leap           184
max_vertical_leap                184
lane_agility_time                192
modified_lane_agility_time       713
three_quarter_sprint             189
bench_press                      393
spot_fifteen_corner_left        1050
spot_fifteen_break_left         1048
spot_fifteen_top_key            1048
s

In [26]:
player_sin_position = draft_stats_season[draft_stats_season['position'].isnull()]
player_sin_position.head()

Unnamed: 0,season,player_id,first_name,last_name,player_name,position,height_wo_shoes,height_wo_shoes_ft_in,height_w_shoes,height_w_shoes_ft_in,...,spot_nba_break_right,spot_nba_corner_right,off_drib_fifteen_break_left,off_drib_fifteen_top_key,off_drib_fifteen_break_right,off_drib_college_break_left,off_drib_college_top_key,off_drib_college_break_right,on_move_fifteen,on_move_college
189,2005,101122,Danny,Granger,Danny Granger,,79.5,6' 7.5'',80.5,6' 8.5'',...,,,,,,,,,,
314,2007,201146,Jianlian,Yi,Jianlian Yi,,,,,,...,,,,,,,,,,
948,2019,1629631,De'Andre,Hunter,De'Andre Hunter,,,,,,...,,,,,,,,,,
967,2019,1629643,Chuma,Okeke,Chuma Okeke,,,,,,...,,,,,,,,,,
984,2019,1629681,Killian,Tillie,Killian Tillie,,,,,,...,,,,,,,,,,


### Tratado de nulos
    Se decide trabajar hasta la columna three_quarter_corner_left ya que las demas columnas superan el 50% de valores nulos

In [27]:
columnas_a_rellenar_con_0 = [
    'position', 'height_wo_shoes', 'height_wo_shoes_ft_in', 'height_w_shoes',
    'height_w_shoes_ft_in', 'weight', 'wingspan', 'wingspan_ft_in',
    'standing_reach', 'standing_reach_ft_in', 'body_fat_pct',
    'hand_length', 'hand_width', 'standing_vertical_leap',
    'max_vertical_leap', 'lane_agility_time', 'modified_lane_agility_time',
    'three_quarter_sprint'
]

# Rellenar valores nulos con ceros para las columnas restantes
for columna in columnas_a_rellenar_con_0:
    draft_stats_season[columna] = draft_stats_season[columna].fillna(0)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  draft_stats_season[columna] = draft_stats_season[columna].fillna(0)


### Visualizacion del dataframe

In [28]:
draft_stats_season.head()

Unnamed: 0,season,player_id,first_name,last_name,player_name,position,height_wo_shoes,height_wo_shoes_ft_in,height_w_shoes,height_w_shoes_ft_in,...,spot_nba_break_right,spot_nba_corner_right,off_drib_fifteen_break_left,off_drib_fifteen_top_key,off_drib_fifteen_break_right,off_drib_college_break_left,off_drib_college_top_key,off_drib_college_break_right,on_move_fifteen,on_move_college
78,2004,2754,Tony,Allen,Tony Allen,SG,75.5,6' 3.5'',76.25,6' 4.25'',...,,,,,,,,,,
79,2004,2772,Trevor,Ariza,Trevor Ariza,SG-SF,79.0,6' 7'',80.25,6' 8.25'',...,,,,,,,,,,
80,2004,2857,Andre,Barrett,Andre Barrett,PG,68.75,5' 8.75'',69.75,5' 9.75'',...,,,,,,,,,,
81,2004,101138,Brandon,Bass,Brandon Bass,PF,78.25,6' 6.25'',79.25,6' 7.25'',...,,,,,,,,,,
82,2004,2740,Andris,Biedrins,Andris Biedrins,C,0.0,0,0.0,0,...,,,,,,,,,,


### Eliminacion de las columnas con NA

In [29]:
draft_stats_season_clear = draft_stats_season.dropna(axis=1)

### Visualizar el dataframe limpio

In [30]:
draft_stats_season_clear.head()

Unnamed: 0,season,player_id,first_name,last_name,player_name,position,height_wo_shoes,height_wo_shoes_ft_in,height_w_shoes,height_w_shoes_ft_in,...,standing_reach,standing_reach_ft_in,body_fat_pct,hand_length,hand_width,standing_vertical_leap,max_vertical_leap,lane_agility_time,modified_lane_agility_time,three_quarter_sprint
78,2004,2754,Tony,Allen,Tony Allen,SG,75.5,6' 3.5'',76.25,6' 4.25'',...,102.0,8' 6'',6.7,0.0,0.0,31.5,36.5,10.7,0.0,3.19
79,2004,2772,Trevor,Ariza,Trevor Ariza,SG-SF,79.0,6' 7'',80.25,6' 8.25'',...,107.5,8' 11.5'',8.0,0.0,0.0,29.5,32.0,11.63,0.0,3.29
80,2004,2857,Andre,Barrett,Andre Barrett,PG,68.75,5' 8.75'',69.75,5' 9.75'',...,92.0,7' 8'',6.7,0.0,0.0,29.0,34.5,10.63,0.0,3.08
81,2004,101138,Brandon,Bass,Brandon Bass,PF,78.25,6' 6.25'',79.25,6' 7.25'',...,106.5,8' 10.5'',9.3,0.0,0.0,31.5,33.0,11.61,0.0,3.24
82,2004,2740,Andris,Biedrins,Andris Biedrins,C,0.0,0,0.0,0,...,0.0,0,9.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0


### Visualizar filas y columnas

In [31]:
np.shape(draft_stats_season_clear)

(1124, 23)

### Revisar valores nulos

In [32]:
draft_stats_season_clear.isnull().sum()

season                        0
player_id                     0
first_name                    0
last_name                     0
player_name                   0
position                      0
height_wo_shoes               0
height_wo_shoes_ft_in         0
height_w_shoes                0
height_w_shoes_ft_in          0
weight                        0
wingspan                      0
wingspan_ft_in                0
standing_reach                0
standing_reach_ft_in          0
body_fat_pct                  0
hand_length                   0
hand_width                    0
standing_vertical_leap        0
max_vertical_leap             0
lane_agility_time             0
modified_lane_agility_time    0
three_quarter_sprint          0
dtype: int64

In [33]:
# Borrar duplicados basados en la columna 'player_id'
draft_stats_season_clear_sin_duplicados = draft_stats_season_clear.drop_duplicates(subset=['player_id'])

# Mostrar las primeras filas del DataFrame sin duplicados
print("Primeras filas del DataFrame sin duplicados:")
print(draft_stats_season_clear_sin_duplicados.head())

Primeras filas del DataFrame sin duplicados:
    season  player_id first_name last_name      player_name position  \
78    2004       2754       Tony     Allen       Tony Allen       SG   
79    2004       2772     Trevor     Ariza     Trevor Ariza    SG-SF   
80    2004       2857      Andre   Barrett    Andre Barrett       PG   
81    2004     101138    Brandon      Bass     Brandon Bass       PF   
82    2004       2740     Andris  Biedrins  Andris Biedrins        C   

    height_wo_shoes height_wo_shoes_ft_in  height_w_shoes  \
78            75.50              6' 3.5''           76.25   
79            79.00                6' 7''           80.25   
80            68.75             5' 8.75''           69.75   
81            78.25             6' 6.25''           79.25   
82             0.00                     0            0.00   

   height_w_shoes_ft_in  ...  standing_reach  standing_reach_ft_in  \
78            6' 4.25''  ...           102.0                8' 6''   
79            6

In [34]:
draft_stats_season_clear.rename(columns={'player_id': 'draft_id'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  draft_stats_season_clear.rename(columns={'player_id': 'draft_id'}, inplace=True)


In [35]:
draft_stats_season_clear.rename(columns={'season': 'draft_season'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  draft_stats_season_clear.rename(columns={'season': 'draft_season'}, inplace=True)


### Merge entre draft_history y draft_combine_stats

In [36]:
draft_stats_season_clear_columnas_merge = draft_stats_season_clear[[ 'draft_id',
    'position',
    'height_wo_shoes',
    'height_wo_shoes_ft_in',
    'height_w_shoes',
    'height_w_shoes_ft_in',
    'weight',
    'wingspan',
    'wingspan_ft_in',
    'standing_reach',
    'standing_reach_ft_in',
    'body_fat_pct',
    'hand_length',
    'hand_width',
    'standing_vertical_leap',
    'max_vertical_leap',
    'lane_agility_time',
    'modified_lane_agility_time',
    'three_quarter_sprint'
]]

### Merge entre draft_history y draft_stats

In [37]:
draft_completo = pd.merge(draft_history_season_clear, draft_stats_season_clear_columnas_merge, on='draft_id', how='inner')

### Ver el nuevo dataframe

In [38]:
draft_completo.head()

Unnamed: 0,draft_id,player_name,draft_season,round_number,round_pick,overall_pick,team_id,team_city,team_name,team_abbreviation,...,standing_reach,standing_reach_ft_in,body_fat_pct,hand_length,hand_width,standing_vertical_leap,max_vertical_leap,lane_agility_time,modified_lane_agility_time,three_quarter_sprint
0,2730,Dwight Howard,2004,1,1,1,1610612753,Orlando,Magic,ORL,...,111.5,9' 3.5'',10.5,0.0,0.0,30.5,35.5,11.21,0.0,3.14
1,2731,Emeka Okafor,2004,1,2,2,1610612766,Charlotte,Bobcats,CHA,...,110.5,9' 2.5'',5.3,0.0,0.0,31.5,34.0,12.32,0.0,3.15
2,2732,Ben Gordon,2004,1,3,3,1610612741,Chicago,Bulls,CHI,...,99.0,8' 3'',5.3,0.0,0.0,31.5,37.5,11.28,0.0,3.19
3,2733,Shaun Livingston,2004,1,4,4,1610612746,Los Angeles,Clippers,LAC,...,105.5,8' 9.5'',6.7,0.0,0.0,26.5,30.0,10.72,0.0,3.23
4,2734,Devin Harris,2004,1,5,5,1610612764,Washington,Wizards,WAS,...,98.5,8' 2.5'',5.3,0.0,0.0,30.5,37.0,11.03,0.0,3.19


### Ver filas y columnas

In [39]:
np.shape(draft_completo)

(682, 30)

### Chequeo de duplicados

In [40]:
duplicados = draft_completo.duplicated(keep=False)

# Filtra las filas duplicadas
datos_duplicados = draft_completo[duplicados]
print(datos_duplicados)

Empty DataFrame
Columns: [draft_id, player_name, draft_season, round_number, round_pick, overall_pick, team_id, team_city, team_name, team_abbreviation, organization, organization_type, position, height_wo_shoes, height_wo_shoes_ft_in, height_w_shoes, height_w_shoes_ft_in, weight, wingspan, wingspan_ft_in, standing_reach, standing_reach_ft_in, body_fat_pct, hand_length, hand_width, standing_vertical_leap, max_vertical_leap, lane_agility_time, modified_lane_agility_time, three_quarter_sprint]
Index: []

[0 rows x 30 columns]


In [41]:
# Verificar duplicados basados en la columna 'player_id'
duplicados_player_id = draft_completo.duplicated(subset=['draft_id'])

# Mostrar las filas duplicadas basadas en 'player_id'
draft_completo[duplicados_player_id]

Unnamed: 0,draft_id,player_name,draft_season,round_number,round_pick,overall_pick,team_id,team_city,team_name,team_abbreviation,...,standing_reach,standing_reach_ft_in,body_fat_pct,hand_length,hand_width,standing_vertical_leap,max_vertical_leap,lane_agility_time,modified_lane_agility_time,three_quarter_sprint
456,1629021,Moritz Wagner,2018,1,25,25,1610612747,Los Angeles,Lakers,LAL,...,108.0,9' 0'',6.75,9.0,10.75,27.0,34.0,11.48,3.07,3.18
468,1628992,Justin Jackson,2018,2,13,43,1610612743,Denver,Nuggets,DEN,...,105.0,8' 9'',8.9,9.0,9.5,0.0,0.0,0.0,0.0,0.0
470,1628977,Hamidou Diallo,2018,2,15,45,1610612751,Brooklyn,Nets,BKN,...,103.0,8' 7'',4.45,8.75,9.0,32.5,40.5,10.53,3.14,3.1
473,1629004,Svi Mykhailiuk,2018,2,17,47,1610612747,Los Angeles,Lakers,LAL,...,100.0,8' 4'',8.45,8.25,9.5,30.5,37.0,11.25,3.18,3.15
496,1629023,P.J. Washington,2019,1,12,12,1610612766,Charlotte,Hornets,CHA,...,106.5,8' 10.5'',8.6,9.0,10.25,0.0,0.0,0.0,0.0,0.0
516,1628981,Bruno Fernando,2019,2,4,34,1610612755,Philadelphia,76ers,PHI,...,110.0,9' 2'',5.4,9.25,10.0,29.5,33.5,11.29,3.05,3.21
518,1628998,Cody Martin,2019,2,6,36,1610612766,Charlotte,Hornets,CHA,...,102.5,8' 6.5'',5.0,8.25,9.0,28.0,35.5,10.44,2.99,3.32
534,1628986,Jaylen Hands,2019,2,26,56,1610612746,LA,Clippers,LAC,...,97.0,8' 1'',5.1,8.0,8.25,33.0,41.5,10.73,3.03,3.12
545,1628962,Udoka Azubuike,2020,1,27,27,1610612762,Utah,Jazz,UTA,...,109.0,9'1.00'',9.61,9.25,10.0,37.0,41.0,11.82,3.61,3.23
555,1629670,Jordan Nwora,2020,2,15,45,1610612749,Milwaukee,Bucks,MIL,...,104.0,8'8.00'',11.68,8.5,9.25,29.0,35.0,11.29,2.66,3.43


In [42]:
draft_completo.drop_duplicates(subset=['draft_id'], inplace=True)

In [43]:
# Verificar duplicados basados en la columna 'player_id'
duplicados_player_id = draft_completo.duplicated(subset=['draft_id'])

# Mostrar las filas duplicadas basadas en 'player_id'
draft_completo[duplicados_player_id]

Unnamed: 0,draft_id,player_name,draft_season,round_number,round_pick,overall_pick,team_id,team_city,team_name,team_abbreviation,...,standing_reach,standing_reach_ft_in,body_fat_pct,hand_length,hand_width,standing_vertical_leap,max_vertical_leap,lane_agility_time,modified_lane_agility_time,three_quarter_sprint


In [44]:
np.shape(draft_completo)

(666, 30)

### Revision de valores nulos luego del merge

In [45]:
draft_completo.isnull().sum()

draft_id                      0
player_name                   0
draft_season                  0
round_number                  0
round_pick                    0
overall_pick                  0
team_id                       0
team_city                     0
team_name                     0
team_abbreviation             0
organization                  0
organization_type             0
position                      0
height_wo_shoes               0
height_wo_shoes_ft_in         0
height_w_shoes                0
height_w_shoes_ft_in          0
weight                        0
wingspan                      0
wingspan_ft_in                0
standing_reach                0
standing_reach_ft_in          0
body_fat_pct                  0
hand_length                   0
hand_width                    0
standing_vertical_leap        0
max_vertical_leap             0
lane_agility_time             0
modified_lane_agility_time    0
three_quarter_sprint          0
dtype: int64

### Exportamos los datos

In [46]:
# Guardar el DataFrame en un archivo CSV
draft_completo.to_csv('draft_completo.csv', index=False)