## Introduction to Pandas

#### Import Pandas

In [1]:
import pandas as pd

#### Creating Series

In [2]:
s = pd.Series([10, 20, 30], index=["a", "b", "c"])
s


a    10
b    20
c    30
dtype: int64

In [3]:
data = {"Name": [ 'Alice','George'], "Age":[ 25,23]}
data

{'Name': ['Alice', 'George'], 'Age': [25, 23]}

In [4]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age
0,Alice,25
1,George,23


#### Viewing Data

In [5]:
df.head(1)

Unnamed: 0,Name,Age
0,Alice,25


In [6]:
df.tail(1)

Unnamed: 0,Name,Age
1,George,23


#### Describe data

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2 entries, 0 to 1
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    2 non-null      object
 1   Age     2 non-null      int64 
dtypes: int64(1), object(1)
memory usage: 164.0+ bytes


In [8]:
df.describe()

Unnamed: 0,Age
count,2.0
mean,24.0
std,1.414214
min,23.0
25%,23.5
50%,24.0
75%,24.5
max,25.0


#### Filtering Data

In [9]:
df[ ["Age","Name"]]

Unnamed: 0,Age,Name
0,25,Alice
1,23,George


In [10]:
df[df["Age"] > 24]


Unnamed: 0,Name,Age
0,Alice,25


#### Dataframe Indexing

In [11]:
df.iloc[0]

Name    Alice
Age        25
Name: 0, dtype: object

In [12]:
df.iloc[:,0]

0     Alice
1    George
Name: Name, dtype: object

In [13]:
df.loc[0]

Name    Alice
Age        25
Name: 0, dtype: object

In [14]:
df.loc[1]

Name    George
Age         23
Name: 1, dtype: object

#### Loading a Dataset
![image.png](iris_species.png)

In [15]:
df = pd.read_csv('iris.csv')

#### Exploring the dataset

In [16]:
df.columns

Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

In [17]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [19]:
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [20]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.054,3.758667,1.198667
std,0.828066,0.433594,1.76442,0.763161
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [21]:
df['species'].value_counts()

species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

In [22]:
#Count ocurrence of categorical variables
df[["sepal_length","species"]]

Unnamed: 0,sepal_length,species
0,5.1,setosa
1,4.9,setosa
2,4.7,setosa
3,4.6,setosa
4,5.0,setosa
...,...,...
145,6.7,virginica
146,6.3,virginica
147,6.5,virginica
148,6.2,virginica


In [23]:
df[(df['sepal_length']>5.5)&(df['species']=='setosa')]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
14,5.8,4.0,1.2,0.2,setosa
15,5.7,4.4,1.5,0.4,setosa
18,5.7,3.8,1.7,0.3,setosa
