# Importing Pandas and Dataset

In [None]:
import pandas as pd

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
)

chicago.head(3)

In [None]:
chicago.info()

In [None]:
chicago["Department"].nunique()

In [None]:
chicago.nunique()

In [None]:
chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

In [None]:
chicago.info()

# Common String Methods - <code>.lower()</code>, <code>.upper()</code>, <code>.title()</code>, and <code>.len()</code>

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

In [None]:
# The .str prefix handles conflicts with existing Python method names when working with Series.
chicago["Name"] = chicago["Name"].str.title()
chicago["Position Title"] = chicago["Position Title"].str.title()
chicago.head(3)

In [None]:
# .len() is an available method for Series.
chicago["Department"].str.len()

# The <code>.str.replace()</code> Method

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.head(3)

In [None]:
chicago["Department"] = chicago["Department"].str.replace("MGMNT", "MANAGEMENT")
chicago["Department"].head()

In [None]:
chicago["Employee Annual Salary"] = chicago["Employee Annual Salary"].str.replace('$', '').astype(float)

In [None]:
chicago["Employee Annual Salary"].sum()
chicago["Employee Annual Salary"].mean()
chicago["Employee Annual Salary"].std()
chicago["Employee Annual Salary"].nlargest(10)

# Filter with String Methods

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
# Returns a DataFrame where "Position Titles" contain the string "water"
mask = chicago["Position Title"].str.lower().str.contains("water")
chicago[mask]

In [None]:
# Returns a DataFrame where "Position Titles" start with "water"
chicago[chicago["Position Title"].str.lower().str.startswith("water")]

In [None]:
# Returns a DataFrame where "Position Titles" start with "water"
mask = chicago["Position Title"].str.lower().str.endswith("ist")
chicago[mask]

# More String Methods - <code>.strip()</code>, <code>.lstrip()</code>, and <code>.rstrip()</code>

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
chicago["Name"] = chicago["Name"].str.rstrip().str.lstrip()

In [None]:
chicago["Position Title"] = chicago["Position Title"].str.strip()

# String Methods on Index and Columns

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv",
    index_col = "Name")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
chicago.index = chicago.index.str.strip().str.title()
chicago.head(3)

In [None]:
chicago.columns = chicago.columns.str.upper()
chicago.head(3)

# Split Strings by Characters with the <code>.str.split()</code> Method

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
# Splits each entry by "," and gets the first element of each list set.
chicago["Name"].str.split(",").str.get(0).str.title().value_counts()

In [None]:
chicago["Position Title"].str.split(" ").str.get(0).value_counts()

# More Practice with Splits

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
# Some serious method chaining going on here. See if you can understand what is happening.
chicago["Name"].str.split(",").str.get(1).str.strip().str.split(" ").str.get(0).value_counts().head()

# The <code>expand</code> and <code>n</code> Parameters of the <code>.str.split()</code> Method

In [None]:
chicago = (
    pd.read_csv("C:\\Users\\Maverick\\Documents\\git\\Data-Analysis-With-Pandas-And-Python\\datasets\\chicago.csv")
    .dropna(how = "all")
)

chicago["Department"] = chicago["Department"].astype("category")
chicago.tail(3)

In [None]:
# Expand returns a DataFrame rather than a Series with List Values separated by columns.
chicago[["First Name", "Last Name"]] = chicago["Name"].str.split(",", expand = True)
chicago.head(3)

In [None]:
# The n parameter is the number of times the split will occur.
chicago[["First Title Word", "Remaining Words"]] = chicago["Position Title"].str.split(" ", expand = True, n = 1)
chicago.head(3)