# Shared Methods and Attributes between Series and DataFrames

In [None]:
# A pandas Series is a one dimensional data structure. A single column of data.
# A dataframe is a two dimensional data structure, like a regular table.
import pandas as pd
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")

In [None]:
# The .head() method shows the first 5 rows just like in a pandas Series.
nba.head()

In [None]:
# The .tail() method shows the last 5 rows just like in a pandas Series.
nba.tail()

In [None]:
nba.index

In [None]:
# Shows the numpy object that makes up the DataFrame.
nba.values

In [None]:
# Gives the tuple containing the dimensions of the DataFrame.
nba.shape

In [None]:
# Returns the object type of each column in the DataFrame.
nba.dtypes

In [None]:
nba.dtypes.value_counts()

In [None]:
# DataFrame exclusive attriibute .columns will return a list of column names.
nba.columns

In [None]:
# DataFrame exclusive attribute .axes will return both sets of indices.
nba.axes

In [None]:
# DataFrame exclusive method .info() will show a big picture summary of DataFrame.
nba.info()

## Difference between Shared Methods

In [None]:
rev = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\revenue.csv", 
    index_col = "Date")
)

rev.head(3)

In [None]:
# Calling .sum() on a DataFrame returns a series with the sums of each column.
rev.sum()

In [None]:
# The .sum() method on a DataFrame also has an axis parameter that enables summing horizontally.
rev.sum(axis = "columns")

## Select One Column from a DataFrame

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
# You can extract a column simply by calling the column name as an attribute.
# Note: Does not work when column names are more than a single word.
nba.Name

In [None]:
# You can access one or more columns in one line this way.
nba["Name"]

In [None]:
# A single column extracted from a DataFrame becomes a pandas Series.
type(nba["Name"])

In [None]:
nba["Name"].head(3)

## Select Two or More Columns from a DataFrame

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba[["Name", "Team"]].head(3)

In [None]:
# You can control the order based on how the list argument is input.
nba[["Team", "Name"]].head(3)

In [None]:
nba[["Salary", "Team", "Name"]].tail()

In [None]:
# A cleaner way to do the above:
select = ["Salary", "Team", "Name"]
nba[select].tail()

## Add New Column to DataFrame

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
# You can add a column to a pandas DataFrame by assigning the DataFrame with the column name
# and setting it equal to values. Be careful not to overwrite!
nba["Sport"] = "Basketball"
nba.head(3)

In [None]:
nba["League"] = "National Basketball Association"
nba.head(3)

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
# Another way to do this is to use the .insert() method, you can
# choose the index to insert the new column.
nba.insert(3, column = "Sport", value = "Basketball")
nba.head(3)

In [None]:
nba.insert(7, column = "League", value = "National Basketball Association")
nba.head(3)

## Broadcasting Operations

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
# Adding 5 to the age of every player in the DataFrame.
nba["Age"].add(5) # Can also do nba["Age"] + 5

In [None]:
# Subtraction 5 million from the salary of every player in the DataFrame.
nba["Salary"].sub(5000000) # Can also do nba["Salary"] = 5000000

In [None]:
# Multipliying each weight by 0.45 to convert from pounds to kilograms.
nba["Weight"].mul(0.453592) # Can also do nba["Weight"].mul(0.453592)

In [None]:
nba["Weight in Kilograms"] = nba["Weight"] * 0.453592
nba.head(3)

In [None]:
# Dividing each NBA player's salary by 1000000
nba["Salary"].div(1000000) # Can also do nba["Salary"] / 1000000
nba["Salary in Millions"] = nba["Salary"].div(1000000)
nba.head(3)

# A Review of the .value_counts() Method

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba["Position"].value_counts()

# Drop Rows with Null Values

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba.tail(3)

In [None]:
# The .dropna() method will drop any rows that contain NaN rows.
nba.dropna(how = "all", inplace = True) # Using "all" will only remove rows where all values are NaN
nba.tail(3)

In [None]:
# To remove columns with NaN, use .dropna() with axis set to 1.
nba.dropna(axis = 1)

In [None]:
# Dropping rows that contain a NaN value in the Salary column only.
nba.dropna(subset = ["Salary"])

# Fill in Null Values with the .fillna() Method

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
# Be careful using this as type errors can occur
nba.fillna(0)

In [None]:
# To avoid the above problem, call .fillna() on the Series within the DataFrame.
nba["Salary"].fillna(0, inplace = True)
nba.head()

In [None]:
nba["College"].fillna("No College", inplace = True)
nba.head()

# The .astype() Method

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.tail(3)

In [None]:
nba["Salary"].fillna(0, inplace = True)
nba["College"].fillna("None", inplace = True)
nba.head(6)

In [None]:
nba.dtypes

In [None]:
nba.info()

In [None]:
# The .astype() method is used to convert from one type to another.
nba["Salary"] = nba["Salary"].astype("int")

In [None]:
nba["Salary"].head()

In [None]:
nba["Position"].nunique()

In [None]:
nba["Position"].astype("category")

In [None]:
nba["Team"].nunique()

In [None]:
nba["Team"] = nba["Team"].astype("category")
nba.head()

# Sort a DataFrame with the .sort_values Method, Part I

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba.sort_values("Name")

# Sort a DataFrame with the .sort_values() Method, Part II

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba.sort_values(["Team", "Name"])


In [None]:
nba.sort_values(["Team", "Name"], ascending = [True, False])

# Sort DataFrame with the .sort_index() Method

In [None]:
nba = pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
nba.head(3)

In [None]:
nba.sort_values(["Number", "Salary", "Name"], inplace = True)
nba.tail(3)

In [None]:
nba.sort_index(ascending = False, inplace = True)
nba.head(3)

# Rank Values with the .rank() Method

In [None]:
# Getting rid of all NaN values in the salary column.
nba = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\nba.csv")
    .dropna(how = "all")
)

nba["Salary"] = nba["Salary"].fillna(0).astype("int")
nba.head(3)

In [None]:
# By default, smaller salaries are reversed with higher ranks.
nba["Salary Rank"] = nba["Salary"].rank(ascending = False).astype("int")
nba.head(3)

In [None]:
nba.sort_values(by = "Salary", ascending = False)