# Pandas Basics: An Introduction

## Reading Data

In [None]:
import pandas as pd

In [None]:
# reading data 
pd.read_excel("../data/LungCapData.xls")

In [None]:
# store in a variable 
df = pd.read_excel("../data/LungCapData.xls")

In [None]:
# show entire dataframe 
df

## Pandas Display Options and the methods `head()` & `tail()`

In [None]:
# show entire dataframe 
df

In [None]:
# print dataframe 
print(df)

In [None]:
# max rows 
pd.options.display.max_rows

In [None]:
# min rows 
pd.options.display.min_rows

In [None]:
# examine first few rows(default 5 rows)
df.head()

In [None]:
# examine first few rows(n = 10)
df.head(10)

In [None]:
# examine first few rows(n = 10)
df.head(n = 2)

In [None]:
# examine last few rows(default = 5)
df.tail()

In [None]:
# examine first few rows(n = 10)
df.tail(10) 

In [None]:
# examine first few rows(n = 10)
df.tail(n = 10) 

### Sampling 

In [None]:
# randomly shows 10 rows 
df.sample(10)

### First Data Inspection

In [None]:
# examine dataframe 
df

In [None]:
# information about dataframe 
df.info()

In [None]:
# summary statistics of entire dataset 
df.describe()

In [None]:
# include categorical data 
df.describe(include = "object")

## Python Built-in Functions & DataFrame Attributes and Methods

### DataFrames and Python Built-in Functions

In [None]:
type(df)

In [None]:
len(df)

In [None]:
round(df, 0)

In [None]:
min(df)

In [None]:
max(df)

## DataFrame Attributes

In [None]:
df.shape

In [None]:
df.size

In [None]:
df.index

In [None]:
df.columns

## DataFrame Methods

In [None]:
df.head(n = 2)

In [None]:
df.info()

In [None]:
df.min()

## Method Chaining

In [None]:
# chaining method 
df.mean().sort_values().head(2)

## Selecting Columns

In [None]:
df.head() 

In [None]:
df["Age"]

In [None]:
type(df["Age"])

In [None]:
# select multiple columns 
df[["Age", "Gender"]]

In [None]:
# type 
type(df[["Age", "Gender"]])

In [None]:
df[["Gender", "Age"]]

In [None]:
df[["Gender", "Age", "LungCap"]]

In [None]:
type(df[["Age"]])

## Selecting one Column with "dot notation"

In [None]:
df.Age 

In [None]:
df['Age']

In [None]:
df.Age.equals(df["Age"])

In [None]:
df.Gender

### Position-based Indexing and Slicing with iloc[]

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../data/covid19.csv", index_col = "Country/Region")

In [None]:
df

In [None]:
df.info()

### Selecting Rows with iloc[]

In [None]:
df.iloc[0]

In [None]:
type(df.iloc[0])

In [None]:
df.iloc[1]

In [None]:
df.iloc[-1]

In [None]:
df.iloc[[1, 2, 3]]

In [None]:
df.iloc[1:4]

In [None]:
df.iloc[:5]

In [None]:
df.iloc[-5:]

In [None]:
df.iloc[:]

In [None]:
df.iloc[[2, 45, 5467]]

### Indexing/Slicing Rows and Columns with iloc[]

In [None]:
df.head(10)

In [None]:
df.iloc[0, 4]

In [None]:
df.iloc[0, :3]

In [None]:
df.iloc[0, [0, 2]]

In [None]:
df.iloc[34:39, [0, 2, 5]]

### Selecting Columns with iloc[]

In [None]:
df.iloc[:, 4].equals(df.Confirmed)

In [None]:
df["Province/State"]

### Label-based Indexing and Slicing with loc[] 

### Selecting Rows with loc[]

In [None]:
df.columns

In [None]:
df.loc["Mainland China"] 

In [None]:
len(df.loc['Mainland China'])

### Indexing/Slicing Rows and Columns with loc[]

In [None]:
df = pd.read_csv('../data/covid19.csv', index_col="Country/Region")
df.head() 

In [None]:
df.loc["Mainland China", ["Confirmed", "Deaths"]].head() 

In [None]:
df.loc[["Mainland China", "Bangladesh"], ["Deaths", "Recovered"]].head() 

In [None]:
df.loc[:, ["Confirmed", "Deaths"]].head() 

### Indexing and Slicing with reindex()

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../data/covid19.csv")

In [None]:
df

In [None]:
#summer.loc[[0, 5, 30000, 40000], ["Athlete", "Medal"]]

In [None]:
df.reindex(index = [0, 5, 30000, 40000], columns =  ["Confirmed", "Deaths", "Recovered"])

In [None]:
df = pd.read_csv("../data/covid19.csv", index_col = "Country/Region")

In [None]:
df.reindex(columns = ["Confirmed", "Deaths"])

## Summary and Outlook

### Importing from CSV and first Inspection

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../data/covid19.csv", index_col = "Country/Region")
df.head() 

In [None]:
df.info()

### Selecting one Column

In [None]:
df.Confirmed

In [None]:
df["Deaths"]

### Selecting multiple Columns

In [None]:
df[["Confirmed", "Deaths"]].head() 

In [None]:
df.loc[:, ["Confirmed", "Deaths"]].head() 

### Selecting positional rows

In [None]:
df.iloc[10:21]

### Selecting labeled rows

In [None]:
df.loc["Mainland China"]

### Putting it all together

In [None]:
df[["Confirmed", "Deaths", "Recovered"]].loc["Mainland China"]

In [None]:
df[["Deaths", "Recovered"]].loc["Bangladesh"]

In [None]:
df[["Confirmed", "Deaths", "Recovered"]].loc[["Mainland China", "Bangladesh"]]

### Outlook Pandas Objects

In [None]:
df

In [None]:
type(df)

In [None]:
df["Last Update"]

In [None]:
type(df["Last Update"])

In [None]:
df.columns

In [None]:
type(df.columns)

In [None]:
df.index

In [None]:
type(df.index)

## Advanced Indexing and Slicing (optional)

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../data/covid19.csv")

In [None]:
df.head() 

### Case 1: Getting the first 5 rows and rows 354 and 765

In [None]:
rows = list(range(5)) + [354, 765]
rows

In [None]:
df.iloc[rows]

### Case 2: Getting the first three columns and the columns "Confirmed" and "Deaths"

In [None]:
df.columns[:3].to_list() + ["Confirmed", "Deaths"]

In [None]:
col = df.columns[:3].to_list() + ["Confirmed", "Deaths"]
col

In [None]:
df.loc[:, col]

### Case 3: Combining Position- and label-based Indexing: Rows at Positions 200 and 300 and columns "Confirmed" and "Deaths"

In [None]:
df

In [None]:
df.loc[[200, 300], ["Confirmed", "Deaths"]]