# Tabla Team

In [39]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Carga de team

In [40]:
team = pd.read_csv('team.csv')

In [41]:
np.shape(team)

(30, 7)

### Vista preeliminar de los datos

In [42]:
team.head()

Unnamed: 0,id,full_name,abbreviation,nickname,city,state,year_founded
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949.0
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946.0
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970.0
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002.0
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966.0


### Tipo de datos del dataframe

In [43]:
team.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   id            30 non-null     int64  
 1   full_name     30 non-null     object 
 2   abbreviation  30 non-null     object 
 3   nickname      30 non-null     object 
 4   city          30 non-null     object 
 5   state         30 non-null     object 
 6   year_founded  30 non-null     float64
dtypes: float64(1), int64(1), object(5)
memory usage: 1.8+ KB


### Conversion de la columna year_founded a INT

In [44]:
team['year_founded'] = team['year_founded'].astype(int)

In [45]:
team.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 30 entries, 0 to 29
Data columns (total 7 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   id            30 non-null     int64 
 1   full_name     30 non-null     object
 2   abbreviation  30 non-null     object
 3   nickname      30 non-null     object
 4   city          30 non-null     object
 5   state         30 non-null     object
 6   year_founded  30 non-null     int64 
dtypes: int64(2), object(5)
memory usage: 1.8+ KB


### Visualizar valores nulos

In [46]:
team.isnull().sum()

id              0
full_name       0
abbreviation    0
nickname        0
city            0
state           0
year_founded    0
dtype: int64

# Carga de team_details

In [47]:
team_details = pd.read_csv('team_details.csv')

In [48]:
np.shape(team_details)

(25, 14)

### Visualizacion preeliminar de los datos

In [49]:
team_details.head()

Unnamed: 0,team_id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,owner,generalmanager,headcoach,dleagueaffiliation,facebook,instagram,twitter
0,1610612737,ATL,Hawks,1949.0,Atlanta,State Farm Arena,18729.0,Tony Ressler,Travis Schlenk,Quin Snyder,College Park Skyhawks,https://www.facebook.com/hawks,https://instagram.com/atlhawks,https://twitter.com/ATLHawks
1,1610612741,CHI,Bulls,1966.0,Chicago,United Center,21711.0,Michael Reinsdorf,Arturas Karnisovas,Billy Donovan,Windy City Bulls,https://www.facebook.com/chicagobulls,https://instagram.com/chicagobulls,https://twitter.com/chicagobulls
2,1610612742,DAL,Mavericks,1980.0,Dallas,American Airlines Center,19200.0,Mark Cuban,Nico Harrison,Jason Kidd,Texas Legends,https://www.facebook.com/dallasmavs,https://instagram.com/dallasmavs,https://twitter.com/dallasmavs
3,1610612743,DEN,Nuggets,1976.0,Denver,Ball Arena,,Stan Kroenke,Calvin Booth,Michael Malone,Grand Rapids Gold,https://www.facebook.com/DenverNuggets,https://instagram.com/nuggets,https://twitter.com/nuggets
4,1610612744,GSW,Warriors,1946.0,Golden State,Chase Center,,Joe Lacob,Bob Myers,Steve Kerr,Santa Cruz Warriors,https://www.facebook.com/warriors,https://instagram.com/warriors,https://twitter.com/warriors


### 
La "D-League" (actualmente conocida como la NBA G League) es la liga de desarrollo de la NBA, creada para dar a los jugadores jóvenes y a los talentos en desarrollo una plataforma para mejorar sus habilidades y tener la oportunidad de ser llamados a la NBA. 
La "D-League affiliation" se refiere a la relación o asociación entre un equipo de la NBA y un equipo de la D-League/G League.

Los equipos de la NBA tienen afiliaciones con equipos de la D-League/G League para que los jugadores que no están en el roster principal puedan jugar en la liga de desarrollo y así tener más oportunidades para demostrar su talento y mejorar su juego. La afiliación también permite que los jugadores se reúnan con el equipo de la NBA para prácticas y juegos cuando sea necesario.

### Tipo de datos del dataframe

In [50]:
team_details.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 25 entries, 0 to 24
Data columns (total 14 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   team_id             25 non-null     int64  
 1   abbreviation        25 non-null     object 
 2   nickname            25 non-null     object 
 3   yearfounded         25 non-null     float64
 4   city                25 non-null     object 
 5   arena               25 non-null     object 
 6   arenacapacity       16 non-null     float64
 7   owner               25 non-null     object 
 8   generalmanager      25 non-null     object 
 9   headcoach           24 non-null     object 
 10  dleagueaffiliation  25 non-null     object 
 11  facebook            25 non-null     object 
 12  instagram           25 non-null     object 
 13  twitter             25 non-null     object 
dtypes: float64(2), int64(1), object(11)
memory usage: 2.9+ KB


### Transformacion del tipo de dato de la columna yearfounded a INT

In [51]:
team_details['yearfounded'] = team_details['yearfounded'].astype(int)

### Visualizar valores nulos

In [52]:
team_details.isnull().sum()

team_id               0
abbreviation          0
nickname              0
yearfounded           0
city                  0
arena                 0
arenacapacity         9
owner                 0
generalmanager        0
headcoach             1
dleagueaffiliation    0
facebook              0
instagram             0
twitter               0
dtype: int64

### Tratado de valores nulos

In [53]:
team_details['arenacapacity'] = team_details['arenacapacity'].fillna(0)

### Transformacion del tipo de dato de la columna arenacapacity a INT

In [54]:
team_details['arenacapacity'] = team_details['arenacapacity'].astype(int)

In [55]:
team_details.head()

Unnamed: 0,team_id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,owner,generalmanager,headcoach,dleagueaffiliation,facebook,instagram,twitter
0,1610612737,ATL,Hawks,1949,Atlanta,State Farm Arena,18729,Tony Ressler,Travis Schlenk,Quin Snyder,College Park Skyhawks,https://www.facebook.com/hawks,https://instagram.com/atlhawks,https://twitter.com/ATLHawks
1,1610612741,CHI,Bulls,1966,Chicago,United Center,21711,Michael Reinsdorf,Arturas Karnisovas,Billy Donovan,Windy City Bulls,https://www.facebook.com/chicagobulls,https://instagram.com/chicagobulls,https://twitter.com/chicagobulls
2,1610612742,DAL,Mavericks,1980,Dallas,American Airlines Center,19200,Mark Cuban,Nico Harrison,Jason Kidd,Texas Legends,https://www.facebook.com/dallasmavs,https://instagram.com/dallasmavs,https://twitter.com/dallasmavs
3,1610612743,DEN,Nuggets,1976,Denver,Ball Arena,0,Stan Kroenke,Calvin Booth,Michael Malone,Grand Rapids Gold,https://www.facebook.com/DenverNuggets,https://instagram.com/nuggets,https://twitter.com/nuggets
4,1610612744,GSW,Warriors,1946,Golden State,Chase Center,0,Joe Lacob,Bob Myers,Steve Kerr,Santa Cruz Warriors,https://www.facebook.com/warriors,https://instagram.com/warriors,https://twitter.com/warriors


### Eliminacion de columnas innecesarias

In [56]:
columnas_a_eliminar = ['owner', 'generalmanager', 'dleagueaffiliation', 'facebook', 'instagram', 'twitter']
team_details_clear = team_details.drop(columns=columnas_a_eliminar)

In [57]:
team_details_clear.head()

Unnamed: 0,team_id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,headcoach
0,1610612737,ATL,Hawks,1949,Atlanta,State Farm Arena,18729,Quin Snyder
1,1610612741,CHI,Bulls,1966,Chicago,United Center,21711,Billy Donovan
2,1610612742,DAL,Mavericks,1980,Dallas,American Airlines Center,19200,Jason Kidd
3,1610612743,DEN,Nuggets,1976,Denver,Ball Arena,0,Michael Malone
4,1610612744,GSW,Warriors,1946,Golden State,Chase Center,0,Steve Kerr


### Verificar el total de equipos (30 equipos tiene la NBA):

In [58]:
nickname_team = team_details_clear['nickname'].unique()
print(nickname_team)

['Hawks' 'Bulls' 'Mavericks' 'Nuggets' 'Warriors' 'Rockets' 'Clippers'
 'Lakers' 'Heat' 'Bucks' 'Timberwolves' 'Nets' 'Pacers' '76ers' 'Suns'
 'Trail Blazers' 'Kings' 'Spurs' 'Thunder' 'Raptors' 'Jazz' 'Grizzlies'
 'Wizards' 'Pistons' 'Hornets']


### Hay 25 equipos, faltan 5 mas uno extra que sera TOT, para cuando algún jugador tenga más de un equipo en la temporada.

### Carga de equipos faltantes

In [59]:
# Datos extraidos de la pagina oficial de la NBA.
equipos_faltantes = {
    'team_id': [1610612738, 1610612739, 1610612740, 1610612752, 1610612753, 1610612754],
    'abbreviation': ['BOS', 'CLE', 'NOP', 'NYK', 'ORL', 'TOT'],
    'nickname': ['Celtics', 'Cavaliers', 'Pelicans', 'Knicks', 'Magic', 'Total Team'],
    'yearfounded': [1946, 1970, 2002, 1946, 1989, 0000],
    'city': ['Boston', 'Cleveland', 'New Orleans', 'New York', 'Orlando', 'Total Team'],
    'arena': ['TD Garden', 'Rocket Mortgage FieldHouse', 'Smoothie King Center', 'Madison Square Garden', 'Amway Center', 'Total Team'],
    'arenacapacity': [19080, 19431, 16500, 19690, 18000, 0000],
    'headcoach': ['Joe Mazzulla', 'Kenny Atkinson', 'Willie Green', 'Tom Thibodeau', 'Jamahl Mosley', 'Total Team']
}

# Crear un DataFrame con los equipos faltantes
df_faltantes = pd.DataFrame(equipos_faltantes)

# Agregar los equipos faltantes al DataFrame original
team_details_clear = pd.concat([team_details_clear, df_faltantes], ignore_index=True)

In [60]:
team_details_clear.head()

Unnamed: 0,team_id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,headcoach
0,1610612737,ATL,Hawks,1949,Atlanta,State Farm Arena,18729,Quin Snyder
1,1610612741,CHI,Bulls,1966,Chicago,United Center,21711,Billy Donovan
2,1610612742,DAL,Mavericks,1980,Dallas,American Airlines Center,19200,Jason Kidd
3,1610612743,DEN,Nuggets,1976,Denver,Ball Arena,0,Michael Malone
4,1610612744,GSW,Warriors,1946,Golden State,Chase Center,0,Steve Kerr


### Aun hay equipos que faltan los datos de la arenacapacity

Se procede a cargar los datos faltantes

In [61]:
# Actualizar la capacidad de arena para equipos específicos
actualizar_capacidades = {
    'Nuggets': 19000,
    'Warriors': 18064,
    'Timberwolves': 19061,
    'Nets': 17732, 
    '76ers': 20341,
    'Suns': 17071,
    'Thunder': 18203,
    'Raptors': 19800,
    'Jazz': 18306,
    'Pistons': 20332
}

# Iterar sobre los equipos y actualizar la capacidad de arena
for equipo, capacidad in actualizar_capacidades.items():
    team_details_clear.loc[team_details_clear['nickname'] == equipo, 'arenacapacity'] = capacidad

# Mostrar el DataFrame actualizado
team_details_clear.head()

Unnamed: 0,team_id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,headcoach
0,1610612737,ATL,Hawks,1949,Atlanta,State Farm Arena,18729,Quin Snyder
1,1610612741,CHI,Bulls,1966,Chicago,United Center,21711,Billy Donovan
2,1610612742,DAL,Mavericks,1980,Dallas,American Airlines Center,19200,Jason Kidd
3,1610612743,DEN,Nuggets,1976,Denver,Ball Arena,19000,Michael Malone
4,1610612744,GSW,Warriors,1946,Golden State,Chase Center,18064,Steve Kerr


### Ver valores nulos

In [62]:
team_details_clear.isnull().sum()

team_id          0
abbreviation     0
nickname         0
yearfounded      0
city             0
arena            0
arenacapacity    0
headcoach        1
dtype: int64

### Completar dato faltante en headcoach

In [63]:
team_details_clear.loc[team_details_clear['nickname'] == 'Raptors', 'headcoach'] = 'Darko Rajakovic'

### Rename de team_id a id de team_details_clear para realizar el merge con team

In [64]:
team_details_clear.rename(columns={'team_id': 'id'}, inplace=True)

In [65]:
team_details_clear.head()

Unnamed: 0,id,abbreviation,nickname,yearfounded,city,arena,arenacapacity,headcoach
0,1610612737,ATL,Hawks,1949,Atlanta,State Farm Arena,18729,Quin Snyder
1,1610612741,CHI,Bulls,1966,Chicago,United Center,21711,Billy Donovan
2,1610612742,DAL,Mavericks,1980,Dallas,American Airlines Center,19200,Jason Kidd
3,1610612743,DEN,Nuggets,1976,Denver,Ball Arena,19000,Michael Malone
4,1610612744,GSW,Warriors,1946,Golden State,Chase Center,18064,Steve Kerr


### Merge entre team y team_details

In [66]:
team_details_clear_columnas_merge = team_details_clear[['id', 'arena', 'arenacapacity', 'headcoach']]

In [67]:
team_completo = pd.merge(team, team_details_clear_columnas_merge, on='id', how='left')

In [68]:
team_completo = team_completo.rename(columns={'id': 'team_id'})
team_completo = team_completo.rename(columns={'abbreviation': 'team_abbreviation'})

In [69]:
team_completo.head()

Unnamed: 0,team_id,full_name,team_abbreviation,nickname,city,state,year_founded,arena,arenacapacity,headcoach
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949,State Farm Arena,18729,Quin Snyder
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946,TD Garden,19080,Joe Mazzulla
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970,Rocket Mortgage FieldHouse,19431,Kenny Atkinson
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002,Smoothie King Center,16500,Willie Green
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966,United Center,21711,Billy Donovan


In [70]:
np.shape(team_completo)

(31, 10)

In [71]:
# Encuentra y muestra las filas duplicadas basadas en la columna 'team_id'
duplicados_team_id = team_completo[team_completo.duplicated(subset=['team_id'])]
duplicados_team_id

Unnamed: 0,team_id,full_name,team_abbreviation,nickname,city,state,year_founded,arena,arenacapacity,headcoach
18,1610612754,Indiana Pacers,IND,Pacers,Indiana,Indiana,1976,Total Team,0,Total Team


In [72]:
# Elimina las filas duplicadas basadas en la columna 'team_id' en el mismo dataframe
team_completo.drop_duplicates(subset=['team_id'], inplace=True)

In [73]:
# Muestra el dataframe actualizado sin duplicados
print("Dataframe sin duplicados:")
team_completo.head()


Dataframe sin duplicados:


Unnamed: 0,team_id,full_name,team_abbreviation,nickname,city,state,year_founded,arena,arenacapacity,headcoach
0,1610612737,Atlanta Hawks,ATL,Hawks,Atlanta,Atlanta,1949,State Farm Arena,18729,Quin Snyder
1,1610612738,Boston Celtics,BOS,Celtics,Boston,Massachusetts,1946,TD Garden,19080,Joe Mazzulla
2,1610612739,Cleveland Cavaliers,CLE,Cavaliers,Cleveland,Ohio,1970,Rocket Mortgage FieldHouse,19431,Kenny Atkinson
3,1610612740,New Orleans Pelicans,NOP,Pelicans,New Orleans,Louisiana,2002,Smoothie King Center,16500,Willie Green
4,1610612741,Chicago Bulls,CHI,Bulls,Chicago,Illinois,1966,United Center,21711,Billy Donovan


In [74]:
np.shape(team_completo)

(30, 10)

### Guardar el dataframe team_completo

In [75]:
team_completo.to_csv('team_completo_filtrado.csv', index=False)