# Pandas: Intermediate (Part 3)

## Intro to NA Values

In [1]:
import pandas as pd
import numpy as np

In [None]:
sales = pd.read_csv("sales.csv", index_col = 0)

In [None]:
sales

In [None]:
sales.info()

In [None]:
sales.loc["Steven", "Thu"]

In [None]:
sales.iloc[1,1] = None

In [None]:
sales

In [None]:
sales.iloc[2,2] = np.nan

In [None]:
sales

In [None]:
sales.info()

## Handling NA Values / missing Values

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.info()

In [None]:
titanic.isna().sum()

In [None]:
titanic.notna().sum()

In [None]:
titanic.loc[titanic.embarked.isna()]

In [None]:
titanic.shape

In [None]:
titanic.dropna()

In [None]:
titanic.dropna().shape

In [None]:
titanic.dropna(how = "all").shape

In [None]:
titanic.dropna(axis = 1, how = "any").shape

In [None]:
titanic.dropna(axis = 1, thresh = 500).shape

In [None]:
titanic.dropna(axis = 1, thresh = 500, inplace = True)

In [None]:
titanic.info()

In [None]:
titanic.loc[titanic.age.isna()]

In [None]:
mean_age = titanic.age.mean()
mean_age

In [None]:
titanic.age.fillna(value = mean_age, inplace = True)

In [None]:
titanic.age

In [None]:
titanic.info()

## Exporting DataFrames to csv

In [None]:
titanic.head()

In [None]:
titanic.to_csv("clean_df.csv", index = False)

In [None]:
pd.read_csv("clean_df.csv")

## Summary Statistics and Accumulations

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
titanic.count(axis = "columns")

In [None]:
titanic.count(axis = 1)

In [None]:
#titanic.mean(axis = 1) # old

In [None]:
titanic.mean(axis = 1, numeric_only=True) # new

In [None]:
#titanic.sum(axis = 0) # old

In [None]:
titanic.sum(axis = 0, numeric_only=True) # new

In [None]:
titanic.head()

In [None]:
titanic.fare.cumsum(axis = 0)

In [None]:
#titanic.corr() # old

In [None]:
titanic.corr(numeric_only=True) # new

In [None]:
titanic.survived.corr(titanic.pclass)

## The agg() method

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("titanic.csv")

In [None]:
titanic.head()

In [None]:
titanic.describe()

In [None]:
#titanic.mean() # old

In [None]:
titanic.mean(numeric_only=True) # new

In [None]:
#titanic.agg("mean") # old

In [None]:
titanic.agg("mean", numeric_only=True) # new but not recommended

In [None]:
titanic.select_dtypes("number").agg("mean") # new and best practise

In [None]:
titanic.select_dtypes("number").agg(["mean", "std"])

In [None]:
titanic.select_dtypes("number").agg(["mean", "std", "min", "max", "median"])

In [None]:
titanic.select_dtypes("number").agg({"survived": "mean", "age":["min", "max"]})