In [4]:
import pandas as pd

In [7]:
data=[["Mark", 55, "Italy", 4.5, "Europe"],
["John", 33, "USA", 6.7, "America"],
["Tim", 41, "USA", 3.9, "America"],
["Jenny", 12, "Germany", 9.0, "Europe"]]

In [8]:
df = pd.DataFrame(data=data, columns=["name", "age", "country","score", "continent"], index=[1001, 1000, 1002, 1003])

In [9]:
df

Unnamed: 0,name,age,country,score,continent
1001,Mark,55,Italy,4.5,Europe
1000,John,33,USA,6.7,America
1002,Tim,41,USA,3.9,America
1003,Jenny,12,Germany,9.0,Europe


# Columns

In [11]:
# Show columns
df.columns

Index(['name', 'age', 'country', 'score', 'continent'], dtype='object')

In [12]:
# Change column name
df.columns.name = "properties"

In [13]:
# Rename columns
df.rename(columns={"name": "First Name", "age": "Age"})

properties,First Name,Age,country,score,continent
1001,Mark,55,Italy,4.5,Europe
1000,John,33,USA,6.7,America
1002,Tim,41,USA,3.9,America
1003,Jenny,12,Germany,9.0,Europe


In [18]:
df.drop(columns=['name', 'country'], index=[1000, 1003])

properties,age,score,continent
1001,55,4.5,Europe
1002,41,3.9,America


In [19]:
# Transpose
df.T

Unnamed: 0_level_0,1001,1000,1002,1003
properties,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
name,Mark,John,Tim,Jenny
age,55,33,41,12
country,Italy,USA,USA,Germany
score,4.5,6.7,3.9,9.0
continent,Europe,America,America,Europe


In [20]:
# Reorder the columns
df.loc[:, ["continent", "country", "name", "age", "score"]]

properties,continent,country,name,age,score
1001,Europe,Italy,Mark,55,4.5
1000,America,USA,John,33,6.7
1002,America,USA,Tim,41,3.9
1003,Europe,Germany,Jenny,12,9.0


# Selecting Data

In [22]:
# Using scalars for both row and column selection returns a scalar
df.loc[1001, "name"]

'Mark'

In [23]:
# Using a scalar on either the row or column selection returns a Series
df.loc[[1001, 1002], "age"]

1001    55
1002    41
Name: age, dtype: int64

In [24]:
# Selecting multiple rows and columns returns a DataFrame
df.loc[:1002, ["name", "country"]]

properties,name,country
1001,Mark,Italy
1000,John,USA
1002,Tim,USA


In [27]:
# Shortcut for selecting columns
df["name"]

1001     Mark
1000     John
1002      Tim
1003    Jenny
Name: name, dtype: object

## Selecting by position

In [28]:
# Using interger location to return a scalar
df.iloc[0,0] # first item

'Mark'

In [29]:
# Return a series
df.iloc[[0,2],1]

1001    55
1002    41
Name: age, dtype: int64

In [30]:
# Returns a data frame
df.iloc[:3, [0,2]]

properties,name,country
1001,Mark,Italy
1000,John,USA
1002,Tim,USA


### Selecting by boolean indexing

In [31]:
tf = (df["age"] > 40) & (df["country"] == "USA")

In [32]:
tf

1001    False
1000    False
1002     True
1003    False
dtype: bool

In [33]:
df.loc[tf, :]

properties,name,age,country,score,continent
1002,Tim,41,USA,3.9,America
