In [None]:
import pandas as pd

In [None]:
pd.read_csv("nba.csv")
# pandas converts number columns with NaN to flooting point...

# Shared Methods and Attributes

In [None]:
nba = pd.read_csv("nba.csv")

In [None]:
nba.head(7)
nba.tail(2)

In [None]:
nba.index

In [None]:
nba.values

In [None]:
nba.shape

In [None]:
nba.dtypes

In [None]:
nba.columns

In [None]:
#index and columns
nba.axes

In [None]:
nba.info()

In [None]:
nba.get_dtype_counts()

# Differences between Shared methods

In [None]:
rev = pd.read_csv("revenue.csv", index_col="Date")
rev

In [None]:
s = pd.Series([1,2,3])
s

In [None]:
s.sum()

In [None]:
rev.sum()
rev.sum(axis = 0) # or axis = "index"
rev.sum(axis = 1) # axis = "colums"

# Select One Column from a `DataFrame`

In [None]:
# extra single column as a series
# only works when columns don't have spaces! so no likey, can't be sure it'll always work.
nba.Name
nba.Number
nba.Salary

output = None

In [None]:
# extra single column as a series
# can have spaces!
nba["Name"]
nba["Number"]
nba["Salary"]

In [None]:
type(nba["Name"])

In [None]:
nba["Name"].head()

# Select Two or More Columns from A `DataFrame`

In [None]:
nba.head(3)

In [None]:
# create new fram! select column order.
nba[["Team","Name"]].head(3)
nba[["Number","College"]]
nba[["Salary","Team","Name"]].tail()

In [None]:
select = ["Salary","Team","Name"]
nba[select]

# Add New Column to `DataFrame`

In [None]:
nba.head(3)

In [None]:
# nba["Sport"] will error, no column
nba["Sport"] = "Basketball"
nba.head(3)

In [None]:
nba["League"] = "National Basketball Association"
nba.head(3)

In [None]:
#reset 
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba.insert(3, column = "Sport", value = "Basketball")

In [None]:
nba.head()

In [None]:
nba.insert(7, column = "League", value = "national Basketball Association")

In [None]:
nba.head()

# Broadcasting Operations

In [None]:
#reset 
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba["Age"].add(5)
nba["Age"] + 5

nba["Salary"].sub(5000000)
nba["Salary"] - 5000000

nba["Weight"].mul(0.453592)
nba["Weight"] * 0.453592
nba["Weight in Kilograms"] = nba["Weight"] * 0.453592

In [None]:
nba.head()

In [None]:
nba["Salary"].div(1000000)
nba["Salary"] / 1000000
nba["Salary in Millions"] = nba["Salary"] / 1000000

In [None]:
nba.head()

# A Review of the `.value_counts()` Methods

In [None]:
nba = pd.read_csv("nba.csv")
nba.head()

In [None]:
nba["Team"].value_counts()
nba["Position"].value_counts().head(1)
nba["Weight"].value_counts().tail()
nba["Salary"].value_counts()

# Drop Rows with Null Values

In [None]:
nba = pd.read_csv("nba.csv")
nba

In [None]:
# removes any rows that have a NULL value. removes whole row.
nba.dropna()
# remove any rows where all the values are NULL.
nba.dropna(how = "all", inplace = True)

In [None]:
nba

In [None]:
#drop any columns that have a NULL value
nba.dropna(axis = 1) # or axis = "columns"

In [None]:
# only remove a row if there is a NULL value in the "Salary" column
nba.dropna(subset = ["Salary"])

# Fill in Null Values with the `.fillna()` Method

In [None]:
nba = pd.read_csv("nba.csv")
nba

In [None]:
# problems with changin all columns to 0
nba.fillna(0)

In [None]:
nba["Salary"].fillna(0, inplace = True)
nba.head()

In [None]:
nba["College"].fillna("No College", inplace = True)
nba.head()

# The `astype()` Method

In [None]:
#requires series to not have NULl values
nba = pd.read_csv("nba.csv").dropna(how = "all")
nba["Salary"].fillna(0, inplace = True)
nba["College"].fillna("None", inplace = True)
nba

In [None]:
# object = string
nba.dtypes
nba.info() # gives extra info!

In [None]:
nba["Salary"] = nba["Salary"].astype("int")

In [None]:
nba.head()

In [None]:
nba["Number"] = nba["Number"].astype("int")
nba["Age"] = nba["Age"].astype("int")
nba.head()

In [None]:
# float, int, object, category (m, f)
nba["Position"].nunique()

In [None]:
nba["Position"] = nba["Position"].astype("category")
nba.head()

In [None]:
nba["Team"].nunique() # 30 is still low enough for categories
nba["Team"] = nba["Team"].astype("category")
nba.head()

* rerun .info() to see reduction in memory usage

# Sort a `DataFrame` with the `.sort_values()` Method, Part I

In [None]:
nba = pd.read_csv("nba.csv")
nba

In [None]:
nba.sort_values("Name", ascending = False)
nba.sort_values("Age", ascending = False)
nba.sort_values("Salary", ascending = False, inplace = True)
nba.head()

In [None]:
nba.sort_values("Salary").tail()
nba.sort_values("Salary", na_position = "first")

# Sort a `DataFrame` with the `.sort_values()` Method, Part II

In [None]:
nba = pd.read_csv("nba.csv")
nba

In [None]:
nba.sort_values(["Team","Name"], ascending = [True, False], inplace = True)
nba.head()

# Sort a `DataFrame` with the `.sort_index()` Method

In [None]:
nba = pd.read_csv("nba.csv")
nba

In [None]:
nba.sort_values(["Number","Salary","Name"], inplace = True)
nba.tail()

In [None]:
nba.sort_index(ascending = False, inplace = True)
nba.head()

# Rank Values with the `.rank()` Method

In [None]:
nba = pd.read_csv("nba.csv").dropna(how = "all")
nba["Salary"] = nba["Salary"].fillna(0).astype("int")
nba

In [None]:
nba["Salary Rank"] = nba["Salary"].rank(ascending = False).astype("int")
nba.head()

In [None]:
nba.sort_values(by = "Salary", ascending = False)