In [1]:
import pandas as pd
import numpy as np

## Series

In [3]:
temperatures = [25, 28, 30, 26, 29, 31, 27]

In [5]:
series = pd.Series(temperatures)

In [6]:
series

0    25
1    28
2    30
3    26
4    29
5    31
6    27
dtype: int64

In [7]:
value = 10

In [8]:
series = pd.Series(value, index=[0, 1, 2, 3, 4])

In [9]:
series

0    10
1    10
2    10
3    10
4    10
dtype: int64

In [11]:
ages = [25, 30, 35, 28, 32]
index_labels = ['John', 'Jane', 'Mike', 'Emily', 'Alex']
series = pd.Series(ages, index=index_labels)
series

John     25
Jane     30
Mike     35
Emily    28
Alex     32
dtype: int64

## DataFrame

In [12]:
data = {
    'Name': ['John', 'Jane', 'Mike'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Paris', 'London']
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,John,25,New York
1,Jane,30,Paris
2,Mike,35,London


In [13]:
data = {
    'Name': ['John', 'Jane', 'Mike'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Paris', 'London']
}
df = pd.DataFrame(data, index=['A', 'B', 'C'])
df

Unnamed: 0,Name,Age,City
A,John,25,New York
B,Jane,30,Paris
C,Mike,35,London


## Datatypes of Pandas

In [14]:
s = pd.Series(['apple', 'banana', 'cherry'])
s.dtype

dtype('O')

In [15]:
print(s.dtype)

object


In [17]:
df = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie'],
    'gender': ['F', 'M', 'M'],
    'hobby': ['reading', 'gaming', 'cooking']
})

df.dtypes

name      object
gender    object
hobby     object
dtype: object

In [19]:
s = pd.Series([1, 2, 3, 4])

s.dtype

dtype('int64')

In [20]:
print(s.dtype)

int64


In [21]:
df = pd.DataFrame({
    'id': [1, 2, 3],
    'age': [25, 30, 35],
    'score': [80, 90, 100]
})

df.dtypes

id       int64
age      int64
score    int64
dtype: object

In [22]:
s = pd.Series([1.0, 2.5, 3.2])

s.dtype

dtype('float64')

In [23]:
print(s.dtype)

float64


In [26]:
df = pd.DataFrame({
    'price': [10.0, np.nan, 15.0],
    'discount': [0.1, np.nan, np.nan],
    'final_price': [9.0,np.nan, np.nan]
})

df.dtypes

price          float64
discount       float64
final_price    float64
dtype: object

In [27]:
s = pd.Series([True, False, True])

s.dtype

dtype('bool')

In [28]:
print(s.dtype)

bool


In [29]:
df = pd.DataFrame({
    'is_even': [True, False, True],
    'is_positive': [True, True, False],
    'is_prime': [False, True, False]
})

df.dtypes

is_even        bool
is_positive    bool
is_prime       bool
dtype: object

## Loading Data from Files and the Web for Pandas

In [30]:
df = pd.read_csv('housing.csv')

df.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0


In [31]:
df = pd.read_csv('housing.csv', index_col='Date')

df.head()

Unnamed: 0_level_0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Distance,Postcode,Bedroom2,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
3/12/2016,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,2.5,3067.0,2.0,1.0,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0
4/02/2016,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,2.5,3067.0,2.0,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
4/03/2017,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,2.5,3067.0,3.0,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
4/03/2017,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,2.5,3067.0,3.0,2.0,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
4/06/2016,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,2.5,3067.0,3.0,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0


In [33]:
df = pd.read_csv('housing.csv', usecols=['Rooms', 'Price'])

df.head()

Unnamed: 0,Rooms,Price
0,2,1480000.0
1,2,1035000.0
2,3,1465000.0
3,3,850000.0
4,4,1600000.0


In [4]:
df = pd.read_csv('housing.csv')

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 13580 entries, 0 to 13579
Data columns (total 21 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Suburb         13580 non-null  object 
 1   Address        13580 non-null  object 
 2   Rooms          13580 non-null  int64  
 3   Type           13580 non-null  object 
 4   Price          13580 non-null  float64
 5   Method         13580 non-null  object 
 6   SellerG        13580 non-null  object 
 7   Date           13580 non-null  object 
 8   Distance       13580 non-null  float64
 9   Postcode       13580 non-null  float64
 10  Bedroom2       13580 non-null  float64
 11  Bathroom       13580 non-null  float64
 12  Car            13518 non-null  float64
 13  Landsize       13580 non-null  float64
 14  BuildingArea   7130 non-null   float64
 15  YearBuilt      8205 non-null   float64
 16  CouncilArea    12211 non-null  object 
 17  Lattitude      13580 non-null  float64
 18  Longti

In [35]:
df.isnull().any()

Suburb           False
Address          False
Rooms            False
Type             False
Price            False
Method           False
SellerG          False
Date             False
Distance         False
Postcode         False
Bedroom2         False
Bathroom         False
Car               True
Landsize         False
BuildingArea      True
YearBuilt         True
CouncilArea       True
Lattitude        False
Longtitude       False
Regionname       False
Propertycount    False
dtype: bool

In [36]:
df.columns

Index(['Suburb', 'Address', 'Rooms', 'Type', 'Price', 'Method', 'SellerG',
       'Date', 'Distance', 'Postcode', 'Bedroom2', 'Bathroom', 'Car',
       'Landsize', 'BuildingArea', 'YearBuilt', 'CouncilArea', 'Lattitude',
       'Longtitude', 'Regionname', 'Propertycount'],
      dtype='object')

In [37]:
has_null_cols = df.columns[df.isnull().any()].tolist()
has_null_cols

['Car', 'BuildingArea', 'YearBuilt', 'CouncilArea']

In [41]:
df.isnull().any()[df.isnull().any() == True].index.tolist()

['Car', 'BuildingArea', 'YearBuilt', 'CouncilArea']

In [42]:
df.isnull().sum()[df.isnull().sum() > 0].index.tolist()

['Car', 'BuildingArea', 'YearBuilt', 'CouncilArea']

In [6]:
df[['Landsize', 'Type']].dtypes

Landsize    float64
Type         object
dtype: object

In [7]:
df = pd.read_csv('housing.csv', dtype={'Landsize': int, 'Type': 'category'})

In [8]:
df[['Landsize', 'Type']].dtypes

Landsize       int64
Type        category
dtype: object

In [9]:
df.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
2,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
3,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
4,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0


In [10]:
df = pd.read_csv('housing.csv', skiprows=2)

In [11]:
df.head()

Unnamed: 0,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0
0,Abbotsford,5 Charles St,3,h,1465000.0,SP,Biggin,4/03/2017,2.5,3067.0,...,2.0,0.0,134.0,150.0,1900.0,Yarra,-37.8093,144.9944,Northern Metropolitan,4019.0
1,Abbotsford,40 Federation La,3,h,850000.0,PI,Biggin,4/03/2017,2.5,3067.0,...,2.0,1.0,94.0,,,Yarra,-37.7969,144.9969,Northern Metropolitan,4019.0
2,Abbotsford,55a Park St,4,h,1600000.0,VB,Nelson,4/06/2016,2.5,3067.0,...,1.0,2.0,120.0,142.0,2014.0,Yarra,-37.8072,144.9941,Northern Metropolitan,4019.0
3,Abbotsford,129 Charles St,2,h,941000.0,S,Jellis,7/05/2016,2.5,3067.0,...,1.0,0.0,181.0,,,Yarra,-37.8041,144.9953,Northern Metropolitan,4019.0
4,Abbotsford,124 Yarra St,3,h,1876000.0,S,Nelson,7/05/2016,2.5,3067.0,...,2.0,0.0,245.0,210.0,1910.0,Yarra,-37.8024,144.9993,Northern Metropolitan,4019.0


In [12]:
df = pd.read_csv('housing.csv', nrows=2)

In [13]:
df.head()

Unnamed: 0,Suburb,Address,Rooms,Type,Price,Method,SellerG,Date,Distance,Postcode,...,Bathroom,Car,Landsize,BuildingArea,YearBuilt,CouncilArea,Lattitude,Longtitude,Regionname,Propertycount
0,Abbotsford,85 Turner St,2,h,1480000.0,S,Biggin,3/12/2016,2.5,3067.0,...,1.0,1.0,202.0,,,Yarra,-37.7996,144.9984,Northern Metropolitan,4019.0
1,Abbotsford,25 Bloomburg St,2,h,1035000.0,S,Biggin,4/02/2016,2.5,3067.0,...,1.0,0.0,156.0,79.0,1900.0,Yarra,-37.8079,144.9934,Northern Metropolitan,4019.0


In [14]:
data = [
    ["Alice", 30, "F"],
    ["Bob", 25, "M"],
    ["Charlie", 35, "M"],
    ["Diana", 28, "F"],
    ["Eric", 40, "M"],
    ["Fiona", 22, "F"],
    ["George", 31, "M"],
    ["Hannah", 27, "F"],
    ["Ian", 29, "M"],
    ["Jenny", 33, "F"],
    ["Kevin", 26, "M"],
    ["Linda", 38, "F"],
    ["Mark", 45, "M"],
    ["Nancy", 24, "F"],
    ["Oliver", 32, "M"],
    ["Paula", 36, "F"],
    ["Quinn", 23, "M"],
    ["Rachel", 29, "F"],
    ["Sam", 41, "M"],
    ["Tina", 34, "F"],
    ["Victor", 37, "M"],
    ["Wendy", 28, "F"],
    ["Xavier", 39, "M"],
    ["Yolanda", 31, "F"],
    ["Zach", 27, "M"],
    ["Emily", 21, "F"],
    ["Frank", 44, "M"],
    ["Grace", 26, "F"],
    ["Henry", 33, "M"],
    ["Iris", 29, "F"],
]

df = pd.DataFrame(data, columns=["Name", "Age", "Gender"])
df.to_excel("people.xlsx", index=False)


In [15]:
df = pd.read_excel('people.xlsx', sheet_name='female')

In [16]:
df.head()

Unnamed: 0,Name,Age,Gender
0,Alice,30,F
1,Diana,28,F
2,Fiona,22,F
3,Hannah,27,F
4,Jenny,33,F


In [2]:
df = pd.read_html('https://en.wikipedia.org/wiki/List_of_countries_by_population')

HTTPError: HTTP Error 403: Forbidden

✅ Why your code failed

Browser requests include headers like user-agent, cookies, etc. → Wikipedia allows.

pandas.read_html() default request looks like an automated bot. → Wikipedia blocks → 403 Forbidden.

✅ How to fix it

You need to supply a proper User-Agent header by downloading the page yourself (using requests) and then passing the HTML to read_html().

In [3]:
import requests

url = "https://en.wikipedia.org/wiki/List_of_countries_by_population"
headers = {"User-Agent": "Mozilla/5.0"}

response = requests.get(url, headers=headers)
response.raise_for_status()   # ensure no HTTP errors

dfs = pd.read_html(response.text)
print(len(dfs))
dfs[0].head()

3


  dfs = pd.read_html(response.text)


Unnamed: 0,Location,Population,% of world,Date,Source (official or from the United Nations),Notes
0,World,8232000000,100%,13 Jun 2025,UN projection[1][3],
1,India,1417492000,17.3%,1 Jul 2025,Official projection[4],[b]
2,China,1408280000,17.1%,31 Dec 2024,Official estimate[5],[c]
3,United States,340110988,4.1%,1 Jul 2024,Official estimate[6],[d]
4,Indonesia,284438782,3.5%,30 Jun 2025,National annual projection[7],
