# Pandas First Steps

## 1. Installing Pandas

In [1]:
pip install pandas

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


## 2. Pandas Basics

### 2.1 Importing pandas as pd

In [2]:
import pandas as pd

### 2.2 Creating a Series

In [4]:
series = pd.Series([1,2,8,4])
print(series)

0    1
1    2
2    8
3    4
dtype: int64


### 2.3 Creating a DataFrame

In [5]:
data = {
    'Name': ['Mario', 'Luigi', 'Peach'],
    'Age': [25,24,24]
}

df = pd.DataFrame(data)

df

Unnamed: 0,Name,Age
0,Mario,25
1,Luigi,24
2,Peach,24


In [6]:
print(df.head())
print("--" * 10)
print(df.tail())

    Name  Age
0  Mario   25
1  Luigi   24
2  Peach   24
--------------------
    Name  Age
0  Mario   25
1  Luigi   24
2  Peach   24


### 2.4 Acessing specific columns

In [7]:
df['Name']

0    Mario
1    Luigi
2    Peach
Name: Name, dtype: object

## 3. Data Import and Export

### 3.1 Reading data from a CSV file

In [8]:
df = pd.read_csv('sample.csv')
df

Unnamed: 0,Name,Age
0,Dave,28
1,Mary,30
2,Julia,12
3,Alice,19
4,Diana,10
5,Frank,49
6,Bob,40
7,Grace,31
8,Xena,45
9,Andy,36


### 3.2 Saving data to a CSV file

In [9]:
df.to_csv('output.csv')

## 4. Basic Data Exploration

### 4.1 Top 5 rows and last 5 roes

In [10]:
# Top 5 rows
print(df.head())

print('--' * 10)

# Last 5 rows
print(df.tail())


    Name  Age
0   Dave   28
1   Mary   30
2  Julia   12
3  Alice   19
4  Diana   10
--------------------
    Name  Age
5  Frank   49
6    Bob   40
7  Grace   31
8   Xena   45
9   Andy   36


### 4.2 Summary statistics

In [12]:
df.describe(include="all")

Unnamed: 0,Name,Age
count,10,10.0
unique,10,
top,Dave,
freq,1,
mean,,30.0
std,,13.216152
min,,10.0
25%,,21.25
50%,,30.5
75%,,39.0


### 4.3 Info about data types and missing values

In [13]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    10 non-null     object
 1   Age     10 non-null     int64 
dtypes: int64(1), object(1)
memory usage: 288.0+ bytes


### 4.4 Checking missing values

In [14]:
df.isnull().sum()

Name    0
Age     0
dtype: int64

### 4.5 Checking unique values of a column

In [15]:
df['Name'].unique()

array(['Dave', 'Mary', 'Julia', 'Alice', 'Diana', 'Frank', 'Bob', 'Grace',
       'Xena', 'Andy'], dtype=object)

### 4.6 Checking unique values of all columns

In [18]:
for i in df:
    print(i)
    print(df[i].unique())
    print()

Name
['Dave' 'Mary' 'Julia' 'Alice' 'Diana' 'Frank' 'Bob' 'Grace' 'Xena' 'Andy']

Age
[28 30 12 19 10 49 40 31 45 36]



## 5. Data Manipulation Basics

### 5.1 Adding a new column

In [19]:
df['Job title'] = ['Job name 1',
                   'Job name 2',
                   'Job name 3',
                   'Job name 4',
                   'Job name 5',
                   'Job name 6',
                   'Job name 7',
                   'Job name 8',
                   'Job name 9',
                   'Job name 10',
                  ]

df

Unnamed: 0,Name,Age,Job title
0,Dave,28,Job name 1
1,Mary,30,Job name 2
2,Julia,12,Job name 3
3,Alice,19,Job name 4
4,Diana,10,Job name 5
5,Frank,49,Job name 6
6,Bob,40,Job name 7
7,Grace,31,Job name 8
8,Xena,45,Job name 9
9,Andy,36,Job name 10


### 5.2 Filtering data

In [21]:
older_people = df[df['Age'] > 19]
older_people

Unnamed: 0,Name,Age,Job title
0,Dave,28,Job name 1
1,Mary,30,Job name 2
5,Frank,49,Job name 6
6,Bob,40,Job name 7
7,Grace,31,Job name 8
8,Xena,45,Job name 9
9,Andy,36,Job name 10


### 5.3 Sorting data

In [23]:
df = df.sort_values(by='Age', ascending=False)
df

Unnamed: 0,Name,Age,Job title
5,Frank,49,Job name 6
8,Xena,45,Job name 9
6,Bob,40,Job name 7
9,Andy,36,Job name 10
7,Grace,31,Job name 8
1,Mary,30,Job name 2
0,Dave,28,Job name 1
3,Alice,19,Job name 4
2,Julia,12,Job name 3
4,Diana,10,Job name 5


### 5.4 Applying a function in a column

In [26]:
def age_plus_one(x):
    return x + 23

df['New Age'] = df['Age'].apply(age_plus_one)
df

Unnamed: 0,Name,Age,Job title,New Age
5,Frank,49,Job name 6,72
8,Xena,45,Job name 9,68
6,Bob,40,Job name 7,63
9,Andy,36,Job name 10,59
7,Grace,31,Job name 8,54
1,Mary,30,Job name 2,53
0,Dave,28,Job name 1,51
3,Alice,19,Job name 4,42
2,Julia,12,Job name 3,35
4,Diana,10,Job name 5,33
