In [1]:
import numpy as np
import pandas as pd

df = pd.read_csv("weather.csv").head()
df

Unnamed: 0,MONTH,DAY,TIME,TEMP,PRESSURE
0,1,1,1,6.8,10207
1,1,1,2,5.8,10214
2,1,1,3,5.7,10220
3,1,1,4,6.0,10225
4,1,1,5,4.5,10230


In [2]:
df["TEMP"]

0    6.8
1    5.8
2    5.7
3    6.0
4    4.5
Name: TEMP, dtype: float64

In [3]:
df["TEMP"][2]

5.7

In [4]:
dft = df.T

In [5]:
dft

Unnamed: 0,0,1,2,3,4
MONTH,1.0,1.0,1.0,1.0,1.0
DAY,1.0,1.0,1.0,1.0,1.0
TIME,1.0,2.0,3.0,4.0,5.0
TEMP,6.8,5.8,5.7,6.0,4.5
PRESSURE,10207.0,10214.0,10220.0,10225.0,10230.0


In [6]:
df.columns

Index(['MONTH', 'DAY', 'TIME', 'TEMP', 'PRESSURE'], dtype='object')

In [7]:
dft.columns

RangeIndex(start=0, stop=5, step=1)

In [8]:
df.notna()

Unnamed: 0,MONTH,DAY,TIME,TEMP,PRESSURE
0,True,True,True,True,True
1,True,True,True,True,True
2,True,True,True,True,True
3,True,True,True,True,True
4,True,True,True,True,True


In [9]:
t = pd.DataFrame([["John", "Bob", "Anne"]], index=[4,3,4,4,2])
t

Unnamed: 0,0,1,2
4,John,Bob,Anne
3,John,Bob,Anne
4,John,Bob,Anne
4,John,Bob,Anne
2,John,Bob,Anne


In [10]:
t[0][4]

4    John
4    John
4    John
Name: 0, dtype: object

# Indexing with lists and slices

In [11]:
df

Unnamed: 0,MONTH,DAY,TIME,TEMP,PRESSURE
0,1,1,1,6.8,10207
1,1,1,2,5.8,10214
2,1,1,3,5.7,10220
3,1,1,4,6.0,10225
4,1,1,5,4.5,10230


In [12]:
df[["PRESSURE", "TEMP", "TIME"]]

Unnamed: 0,PRESSURE,TEMP,TIME
0,10207,6.8,1
1,10214,5.8,2
2,10220,5.7,3
3,10225,6.0,4
4,10230,4.5,5


In [13]:
df["TEMP"][[1,3]]

1    5.8
3    6.0
Name: TEMP, dtype: float64

In [14]:
df[2:4]

Unnamed: 0,MONTH,DAY,TIME,TEMP,PRESSURE
2,1,1,3,5.7,10220
3,1,1,4,6.0,10225


In [15]:
df[2:4][["TEMP", "PRESSURE"]]

Unnamed: 0,TEMP,PRESSURE
2,5.7,10220
3,6.0,10225


In [16]:
dft

Unnamed: 0,0,1,2,3,4
MONTH,1.0,1.0,1.0,1.0,1.0
DAY,1.0,1.0,1.0,1.0,1.0
TIME,1.0,2.0,3.0,4.0,5.0
TEMP,6.8,5.8,5.7,6.0,4.5
PRESSURE,10207.0,10214.0,10220.0,10225.0,10230.0


In [17]:
dft[3:]

Unnamed: 0,0,1,2,3,4
TEMP,6.8,5.8,5.7,6.0,4.5
PRESSURE,10207.0,10214.0,10220.0,10225.0,10230.0


In [18]:
dft[3:][[0,2,4]]

Unnamed: 0,0,2,4
TEMP,6.8,5.7,4.5
PRESSURE,10207.0,10220.0,10230.0


In [19]:
df["PRESSURE"][:3]

0    10207
1    10214
2    10220
Name: PRESSURE, dtype: int64

In [20]:
dft["TIME":"PRESSURE"]

Unnamed: 0,0,1,2,3,4
TIME,1.0,2.0,3.0,4.0,5.0
TEMP,6.8,5.8,5.7,6.0,4.5
PRESSURE,10207.0,10214.0,10220.0,10225.0,10230.0


# loc & iloc

In [21]:
capitals = pd.DataFrame(
    [
        ["Ngerulmud",391,1.87],
        ["Vatican City",826,100],
        ["Yaren",1100,10.91],
        ["Funafuti",4492,45.48],
        ["City of San Marino",4493]
    ],
    index=["Palau", "Vatican City", "Nauru", "Tuvalu", "San Marino"],
    columns=['Capital', 'Population', 'Percentage']
)

capitals

Unnamed: 0,Capital,Population,Percentage
Palau,Ngerulmud,391,1.87
Vatican City,Vatican City,826,100.0
Nauru,Yaren,1100,10.91
Tuvalu,Funafuti,4492,45.48
San Marino,City of San Marino,4493,


In [22]:
capitals.loc["Nauru", "Population"]

1100

In [23]:
capitals["Population"]["Nauru"]

1100

In [24]:
capitals.loc["Nauru":"San Marino", "Capital":"Percentage"]

Unnamed: 0,Capital,Population,Percentage
Nauru,Yaren,1100,10.91
Tuvalu,Funafuti,4492,45.48
San Marino,City of San Marino,4493,


In [25]:
capitals.loc[["San Marino", "Vatican City"]]

Unnamed: 0,Capital,Population,Percentage
San Marino,City of San Marino,4493,
Vatican City,Vatican City,826,100.0


In [26]:
capitals.iloc[[1,3], :2]

Unnamed: 0,Capital,Population
Vatican City,Vatican City,826
Tuvalu,Funafuti,4492


In [27]:
capitals.iloc[3:, 1:]

Unnamed: 0,Population,Percentage
Tuvalu,4492,45.48
San Marino,4493,


In [28]:
capitals.iloc[:, 2]

Palau             1.87
Vatican City    100.00
Nauru            10.91
Tuvalu           45.48
San Marino         NaN
Name: Percentage, dtype: float64

In [29]:
capitals

Unnamed: 0,Capital,Population,Percentage
Palau,Ngerulmud,391,1.87
Vatican City,Vatican City,826,100.0
Nauru,Yaren,1100,10.91
Tuvalu,Funafuti,4492,45.48
San Marino,City of San Marino,4493,


In [30]:
capitals[[True, True, False, True, False]]

Unnamed: 0,Capital,Population,Percentage
Palau,Ngerulmud,391,1.87
Vatican City,Vatican City,826,100.0
Tuvalu,Funafuti,4492,45.48


In [31]:
capitals["Percentage"] > 25

Palau           False
Vatican City     True
Nauru           False
Tuvalu           True
San Marino      False
Name: Percentage, dtype: bool

In [32]:
capitals[capitals["Percentage"] > 25]

Unnamed: 0,Capital,Population,Percentage
Vatican City,Vatican City,826,100.0
Tuvalu,Funafuti,4492,45.48


In [33]:
grades = pd.DataFrame(
   [[6, 4], [7, 8], [6, 7], [6, 5], [5, 2]],
   index = ['Mary', 'John', 'Ann', 'Pete', 'Laura'],
   columns = ['test_1', 'test_2']
)

grades

Unnamed: 0,test_1,test_2
Mary,6,4
John,7,8
Ann,6,7
Pete,6,5
Laura,5,2


In [34]:
grades["test_2"] > grades["test_1"]

Mary     False
John      True
Ann       True
Pete     False
Laura    False
dtype: bool

In [35]:
grades[grades["test_2"] <= grades["test_1"]]

Unnamed: 0,test_1,test_2
Mary,6,4
Pete,6,5
Laura,5,2


In [36]:
grades.loc[:, grades.mean() > 5.5]

Unnamed: 0,test_1
Mary,6
John,7
Ann,6
Pete,6
Laura,5


In [37]:
grades

Unnamed: 0,test_1,test_2
Mary,6,4
John,7,8
Ann,6,7
Pete,6,5
Laura,5,2


In [38]:
grades.loc[["Laura", "John"], "test_2"] += 1
grades

Unnamed: 0,test_1,test_2
Mary,6,4
John,7,9
Ann,6,7
Pete,6,5
Laura,5,3


In [39]:
grades["test_1"] += .5
grades

Unnamed: 0,test_1,test_2
Mary,6.5,4
John,7.5,9
Ann,6.5,7
Pete,6.5,5
Laura,5.5,3


In [40]:
grades.loc["Mary"] += 2
grades

Unnamed: 0,test_1,test_2
Mary,8.5,6.0
John,7.5,9.0
Ann,6.5,7.0
Pete,6.5,5.0
Laura,5.5,3.0


In [41]:
grades.loc["Pete"] = [7, 7]
grades

Unnamed: 0,test_1,test_2
Mary,8.5,6.0
John,7.5,9.0
Ann,6.5,7.0
Pete,7.0,7.0
Laura,5.5,3.0


In [42]:
failing = grades < 6
passing = grades >= 6
grades[failing] = "Fail"
grades[passing] = "Pass"
grades

Unnamed: 0,test_1,test_2
Mary,Pass,Pass
John,Pass,Pass
Ann,Pass,Pass
Pete,Pass,Pass
Laura,Fail,Fail


In [43]:
grades = pd.DataFrame(
   [[6, 4], [7, 8], [6, 7], [6, 5], [5, 2]],
   index = ['Mary', 'John', 'Ann', 'Pete', 'Laura'],
   columns = ['test_1', 'test_2']
)

grades

Unnamed: 0,test_1,test_2
Mary,6,4
John,7,8
Ann,6,7
Pete,6,5
Laura,5,2


In [44]:
grades.mean(axis=1)

Mary     5.0
John     7.5
Ann      6.5
Pete     5.5
Laura    3.5
dtype: float64

In [49]:
grades["passed"] = grades.mean(axis=1) > 6
grades

Unnamed: 0,test_1,test_2,passed
Mary,6,4,False
John,7,8,True
Ann,6,7,True
Pete,6,5,False
Laura,5,2,False


In [57]:
capitals

Unnamed: 0,Population,Percentage,Capital
Vatican City,826,100.0,Vatican City
Tuvalu,4492,45.48,Funafuti
San Marino,4493,,City of San Marino
Palau,391,1.87,Ngerulmud
Nauru,1100,10.91,Yaren


In [60]:
capitals.sort_index(inplace=True, ascending=False)
capitals.sort_index(inplace=True, axis=1)

capitals

Unnamed: 0,Capital,Percentage,Population
Vatican City,Vatican City,100.0,826
Tuvalu,Funafuti,45.48,4492
San Marino,City of San Marino,,4493
Palau,Ngerulmud,1.87,391
Nauru,Yaren,10.91,1100


In [66]:
capitals.sort_values(inplace=True, by="Percentage")
capitals

Unnamed: 0,Capital,Percentage,Population
Palau,Ngerulmud,1.87,391
Nauru,Yaren,10.91,1100
Tuvalu,Funafuti,45.48,4492
Vatican City,Vatican City,100.0,826
San Marino,City of San Marino,,4493


In [70]:
grades.sort_values(by=["test_1", "test_2"], inplace=True)
grades

Unnamed: 0,test_1,test_2,passed
Laura,5,2,False
Mary,6,4,False
Pete,6,5,False
Ann,6,7,True
John,7,8,True
