<a href="https://colab.research.google.com/github/deancuva/Careers/blob/main/Labs%5CPandas_and_CSV_Hands_On.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

 **Loading the Data (CSV Files)**
We'll start by loading the two CSV files into pandas DataFrames.

In [1]:
import pandas as pd

regular_season_url = 'https://raw.githubusercontent.com/deancuva/DS2002/main/2023-2024%20NBA%20Player%20Stats%20-%20Regular.csv'
playoffs_url = 'https://raw.githubusercontent.com/deancuva/DS2002/main/2023-2024%20NBA%20Player%20Stats%20-%20Playoffs.csv'

# Load the CSVs
regular_season_df = pd.read_csv(regular_season_url, sep=';', encoding='latin1')
playoffs_df = pd.read_csv(playoffs_url, sep=';', encoding='latin1')

# Display the first few rows
print("Regular Season Stats:")
print(regular_season_df.head())

print("\nPlayoff Stats:")
print(playoffs_df.head())



Regular Season Stats:
   Rk            Player   Pos  Age   Tm   G  GS    MP   FG   FGA  ...    FT%  \
0   1  Precious Achiuwa  PF-C   24  TOT  74  18  21.9  3.2   6.3  ...  0.616   
1   1  Precious Achiuwa     C   24  TOR  25   0  17.5  3.1   6.8  ...  0.571   
2   1  Precious Achiuwa    PF   24  NYK  49  18  24.2  3.2   6.1  ...  0.643   
3   2       Bam Adebayo     C   26  MIA  71  71  34.0  7.5  14.3  ...  0.755   
4   3      Ochai Agbaji    SG   23  TOT  78  28  21.0  2.3   5.6  ...  0.661   

   ORB  DRB   TRB  AST  STL  BLK  TOV   PF   PTS  
0  2.6  4.0   6.6  1.3  0.6  0.9  1.1  1.9   7.6  
1  2.0  3.4   5.4  1.8  0.6  0.5  1.2  1.6   7.7  
2  2.9  4.3   7.2  1.1  0.6  1.1  1.1  2.1   7.6  
3  2.2  8.1  10.4  3.9  1.1  0.9  2.3  2.2  19.3  
4  0.9  1.8   2.8  1.1  0.6  0.6  0.8  1.5   5.8  

[5 rows x 30 columns]

Playoff Stats:
   Rk                    Player Pos  Age   Tm   G  GS    MP   FG   FGA  ...  \
0   1          Precious Achiuwa  PF   24  NYK   9   2  20.4  2.3   4.8  .

In [None]:
print("\nPlayoff Stats:")
playoffs_df.head()

**Exploring the Data**

In [None]:
# Get basic information about the datasets
print("Regular Season Data Info:")
regular_season_df.info()

print("\nPlayoff Data Info:")
playoffs_df.info()




In [None]:
# Check for missing values
print("\nMissing Values in Regular Season Data:")
regular_season_df.isnull().sum()


In [None]:

print("\nMissing Values in Playoff Data:")
print(playoffs_df.isnull().sum())


In [None]:

# Get statistical summaries of the data
print("\nStatistical Summary (Regular Season):")
#print(regular_season_df.describe())
regular_season_df.describe()

In [None]:

print("\nStatistical Summary (Playoffs):")
playoffs_df.describe()

**Cleaning some Data**

In [None]:
# Dropping any rows with missing values (if necessary)
regular_season_df = regular_season_df.dropna()
playoffs_df = playoffs_df.dropna()

# Rename columns if needed (for easier access)
regular_season_df.rename(columns={'Tm': 'Team','PTS': 'Points', 'TRB': 'Rebounds', 'AST': 'Assists'}, inplace=True)
playoffs_df.rename(columns={'Tm': 'Team','PTS': 'Points', 'TRB': 'Rebounds', 'AST': 'Assists'}, inplace=True)

print("Updated Regular Season Columns:")
print(regular_season_df.columns)

print("\nUpdated Playoff Columns:")
print(playoffs_df.columns)


**Data Analysis & Manipulation**

In [None]:
# Top 10 players with the most points in the regular season
top_scorers_regular = regular_season_df[['Player', 'Points']].sort_values(by='Points', ascending=False).head(10)
print("Top 10 Regular Season Scorers:")
top_scorers_regular

# Top 10 players with the most points in the playoffs
top_scorers_playoffs = playoffs_df[['Player', 'Points']].sort_values(by='Points', ascending=False).head(10)
print("Top 10 Playoff Scorers:")
top_scorers_playoffs

# Grouping the data by 'Team' and calculating the average stats for each team
team_avg_stats_regular = regular_season_df.groupby('Team')[['Points', 'Rebounds', 'Assists']].mean().reset_index()
print("\nAverage Team Stats (Regular Season):")
team_avg_stats_regular

# Similarly for playoffs
team_avg_stats_playoffs = playoffs_df.groupby('Team')[['Points', 'Rebounds', 'Assists']].mean().reset_index()
print("\nAverage Team Stats (Playoffs):")
team_avg_stats_playoffs


In [None]:
# Import visualization libraries
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for the plots
sns.set(style="whitegrid")

# Plot a bar chart of the top 10 regular season scorers
plt.figure(figsize=(10,6))
sns.barplot(x='Points', y='Player', data=top_scorers_regular)
plt.title('Top 10 Regular Season Scorers')
plt.show()

# Plot a bar chart of the top 10 playoff scorers
plt.figure(figsize=(10,6))
sns.barplot(x='Points', y='Player', data=top_scorers_playoffs)
plt.title('Top 10 Playoff Scorers')
plt.show()

# Plot a comparison of average team stats (Points, Rebounds, Assists) between regular season and playoffs
plt.figure(figsize=(12,8))
team_avg_stats = pd.merge(team_avg_stats_regular, team_avg_stats_playoffs, on='Team', suffixes=('_Regular', '_Playoffs'))

# Plot average points for regular season and playoffs
sns.barplot(x='Points_Regular', y='Team', data=team_avg_stats, color="b", label="Regular Season")
sns.barplot(x='Points_Playoffs', y='Team', data=team_avg_stats, color="r", label="Playoffs")
plt.title('Average Points per Team: Regular Season vs Playoffs')
plt.legend()
plt.show()


**Exercises**
Exercise 1: Top 10 Rebounders

Write code to display the top 10 players with the most rebounds in both the regular season and playoffs.


**Exercise 2: Assists Leader by Team**

Group the data by team and calculate which player on each team has the most assists for both the regular season and playoffs

**Exercise 3: Visualize Rebounds per Team** (optional)
Create a visualization that compares the average rebounds per team for the regular season and playoffs. Which team saw the biggest increase or decrease?