# Introduction to Pandas: Series and DataFrames

In [None]:
!pip install numpy

Collecting numpy
  Downloading numpy-2.3.4-cp311-cp311-win_amd64.whl (13.1 MB)
     ---------------------------------------- 13.1/13.1 MB 8.3 MB/s eta 0:00:00
Installing collected packages: numpy
Successfully installed numpy-2.3.4



[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [4]:
!pip install matplotlib

Collecting matplotlib
  Downloading matplotlib-3.10.7-cp311-cp311-win_amd64.whl (8.1 MB)
     ---------------------------------------- 8.1/8.1 MB 8.1 MB/s eta 0:00:00
Collecting contourpy>=1.0.1
  Downloading contourpy-1.3.3-cp311-cp311-win_amd64.whl (225 kB)
     -------------------------------------- 225.2/225.2 kB 6.9 MB/s eta 0:00:00
Collecting cycler>=0.10
  Downloading cycler-0.12.1-py3-none-any.whl (8.3 kB)
Collecting fonttools>=4.22.0
  Downloading fonttools-4.60.1-cp311-cp311-win_amd64.whl (2.3 MB)
     ---------------------------------------- 2.3/2.3 MB 9.1 MB/s eta 0:00:00
Collecting kiwisolver>=1.3.1
  Downloading kiwisolver-1.4.9-cp311-cp311-win_amd64.whl (73 kB)
     ---------------------------------------- 73.8/73.8 kB ? eta 0:00:00
Collecting pillow>=8
  Downloading pillow-12.0.0-cp311-cp311-win_amd64.whl (7.0 MB)
     ---------------------------------------- 7.0/7.0 MB 9.9 MB/s eta 0:00:00
Collecting pyparsing>=3
  Downloading pyparsing-3.2.5-py3-none-any.whl (113 kB)



[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
!pip install pandas

Collecting pandas
  Downloading pandas-2.3.3-cp311-cp311-win_amd64.whl (11.3 MB)
     ---------------------------------------- 11.3/11.3 MB 8.7 MB/s eta 0:00:00
Collecting pytz>=2020.1
  Downloading pytz-2025.2-py2.py3-none-any.whl (509 kB)
     ------------------------------------- 509.2/509.2 kB 10.6 MB/s eta 0:00:00
Collecting tzdata>=2022.7
  Downloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
     ------------------------------------- 347.8/347.8 kB 10.9 MB/s eta 0:00:00
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.3.3 pytz-2025.2 tzdata-2025.2



[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

Matplotlib is building the font cache; this may take a moment.


In [6]:
# Configuration to display more columns
pd.set_option('display.max_columns', None)

In [10]:
# 1. Series - a one-dimensional structure similar to an array or list
print("1. Working with Series")
print("-" * 50)

# Creating a Series from a list
list = [10, 20, 30, 40, 50]
s1 = pd.Series(list)
print("Series created from a list:")
print(s1)
print()


1. Working with Series
--------------------------------------------------
Series created from a list:
0    10
1    20
2    30
3    40
4    50
dtype: int64



In [9]:
# Creating a Series with custom indices
s2 = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print("Series with custom indices:")
print(s2)

Series with custom indices:
a    10
b    20
c    30
d    40
e    50
dtype: int64


In [11]:
# Creating a Series from a Dictionary
dictionary = {'a': 10, 'b': 20, 'c': 30, 'd': 40, 'e': 50}
s3 = pd.Series(dictionary)
print("Series created from a dictionary:")
print(s3)
print()

Series created from a dictionary:
a    10
b    20
c    30
d    40
e    50
dtype: int64



In [12]:
# Accessing elements of a Series
print("Accessing elements of the Series:")
print("Element at index 'a':", s3['a'])
print("Elements at indices 'a', 'c' and 'e':", s3[['a', 'c', 'e']])
print()

Accessing elements of the Series:
Element at index 'a': 10
Elements at indices 'a', 'c' and 'e': a    10
c    30
e    50
dtype: int64



In [13]:
# Series Operations
print("Series Operations:")
print("s2 + 5:")
print(s2 + 5)
print("\ns2 * 2:")
print(s2 * 2)
print()

Series Operations:
s2 + 5:
a    15
b    25
c    35
d    45
e    55
dtype: int64

s2 * 2:
a     20
b     40
c     60
d     80
e    100
dtype: int64



In [14]:
# 2. DataFrames - a two-dimensional structure similar to a table
print("2. Working with DataFrames")
print("-" * 50)

2. Working with DataFrames
--------------------------------------------------


In [None]:
# Creating a DataFrame from a Dictionary
data = {
    'Name': ['Ana', 'Bruno', 'Carlos', 'Daniela', 'Eduardo'],
    'Age': [25, 30, 35, 40, 45],
    'City': ['São Paulo', 'Rio de Janeiro', 'Belo Horizonte', 'Curitiba', 'Salvador'],
    'Salary': [5000, 6000, 7000, 8000, 9000]
}

df1 = pd.DataFrame(data)
print("DataFrame created from a dictionary:")
print(df1)
print()

DataFrame created from a dictionary:
      Name  Age            City  Salary
0      Ana   25       São Paulo    5000
1    Bruno   30  Rio de Janeiro    6000
2   Carlos   35  Belo Horizonte    7000
3  Daniela   40        Curitiba    8000
4  Eduardo   45        Salvador    9000



In [16]:
# DataFrame Information
print("DataFrame Information:")
print("Shape (rows, columns):", df1.shape)
print("\nData Types:")
print(df1.dtypes)
print("\nFirst 3 rows:")
print(df1.head(3))
print("\nLast 2 rows:")
print(df1.tail(2))
print("\nStatistical summary of numeric columns:")
print(df1.describe())
print()

DataFrame Information:
Shape (rows, columns): (5, 4)

Data Types:
Name      object
Age        int64
City      object
Salary     int64
dtype: object

First 3 rows:
     Name  Age            City  Salary
0     Ana   25       São Paulo    5000
1   Bruno   30  Rio de Janeiro    6000
2  Carlos   35  Belo Horizonte    7000

Last 2 rows:
      Name  Age      City  Salary
3  Daniela   40  Curitiba    8000
4  Eduardo   45  Salvador    9000

Statistical summary of numeric columns:
             Age      Salary
count   5.000000     5.00000
mean   35.000000  7000.00000
std     7.905694  1581.13883
min    25.000000  5000.00000
25%    30.000000  6000.00000
50%    35.000000  7000.00000
75%    40.000000  8000.00000
max    45.000000  9000.00000



In [17]:
# Accessing DataFrame Data
print("Accessing DataFrame Data:")
print("Column 'Name':")
print(df1['Name'])
print("\nColumns 'Name' and 'Age':")
print(df1[['Name', 'Age']])
print("\nFirst row:")
print(df1.iloc[0])
print("\nRows 1 to 3:")
print(df1.iloc[1:4])
print("\nValue in row 2, column 'Salary':")
print(df1.iloc[2, 3]) # or df1.loc[2, 'Salary']
print()

Accessing DataFrame Data:
Column 'Name':
0        Ana
1      Bruno
2     Carlos
3    Daniela
4    Eduardo
Name: Name, dtype: object

Columns 'Name' and 'Age':
      Name  Age
0      Ana   25
1    Bruno   30
2   Carlos   35
3  Daniela   40
4  Eduardo   45

First row:
Name            Ana
Age              25
City      São Paulo
Salary         5000
Name: 0, dtype: object

Rows 1 to 3:
      Name  Age            City  Salary
1    Bruno   30  Rio de Janeiro    6000
2   Carlos   35  Belo Horizonte    7000
3  Daniela   40        Curitiba    8000

Value in row 2, column 'Salary':
7000



In [18]:
# Data Filtering
print("Data Filtering:")
print("People older than 30:")
print(df1[df1['Age'] > 30])
print("\nPeople from São Paulo or Salvador:")
print(df1[df1['City'].isin(['São Paulo', 'Salvador'])])
print()

Data Filtering:
People older than 30:
      Name  Age            City  Salary
2   Carlos   35  Belo Horizonte    7000
3  Daniela   40        Curitiba    8000
4  Eduardo   45        Salvador    9000

People from São Paulo or Salvador:
      Name  Age       City  Salary
0      Ana   25  São Paulo    5000
4  Eduardo   45   Salvador    9000



In [19]:
# Creating new columns
print("Creating new columns:")
df1['Age Range'] = ['Young' if age < 30 else 'Adult' if age < 40 else 'Senior' for age in df1['Age']]
df1['Bonus'] = df1['Salary'] * 0.1
print(df1)
print()

Creating new columns:
      Name  Age            City  Salary Age Range  Bonus
0      Ana   25       São Paulo    5000     Young  500.0
1    Bruno   30  Rio de Janeiro    6000     Adult  600.0
2   Carlos   35  Belo Horizonte    7000     Adult  700.0
3  Daniela   40        Curitiba    8000    Senior  800.0
4  Eduardo   45        Salvador    9000    Senior  900.0



In [23]:
# Data Grouping
print("Data Grouping:")
print("Average salary by age Range:")
print(df1.groupby('Age Range')['Salary'].mean())
print("\nCount by age group:")
print(df1.groupby('Age Range').size())
print()

Data Grouping:
Average salary by age Range:
Age Range
Adult     6500.0
Senior    8500.0
Young     5000.0
Name: Salary, dtype: float64

Count by age group:
Age Range
Adult     2
Senior    2
Young     1
dtype: int64



In [None]:
import datetime
# 3. Reading and writing data
print("3. Reading and writing data")
print("-" * 50)

# Creating a DataFrame to save
df_exemple = pd.DataFrame({
    'A': np.random.rand(5),
    'B': np.random.randint(0, 10, 5),
    'C': ['a', 'b', 'c', 'd', 'e'],
    'date': pd.date_range(datetime.datetime(2025, np.random.randint(1, 13), np.random.randint(1, 28)), periods=5)
})

print("DataFrame for I/O demonstration:")

print(df_exemple)
print()

3. Reading and writing data
--------------------------------------------------
DataFrame for I/O demonstration:
          A  B  C       date
0  0.800095  6  a 2025-01-01
1  0.791641  6  b 2025-01-02
2  0.832168  2  c 2025-01-03
3  0.114190  5  d 2025-01-04
4  0.316914  1  e 2025-01-05

