# Day 12: Python

> <h3>Pandas</h3>

![d12.PNG](attachment:d12.PNG)

In [1]:
import numpy as np
import pandas as pd
from pandas import Series, DataFrame

***

### 1. Create a data frame (df) by using data as data and labels as row index. 
<br>

In [21]:
data = {'animal' : ['cat', 'cat', 'snake', 'dog', 'dog', 'cat', 'snake', 'cat', 'dog', 'dog'],
       'age' : [2.5, 3, 0.5, np.nan, 5,2,4.5, np.nan, 7, 3],
       'visit' : [1,3,2,3,2,3,1,1,2,1],
       'priority' : ['yes', 'yes', 'no', 'yes', 'no', 'no', 'no', 'yes', 'no','no']}

labels = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']

df = pd.DataFrame(data, index = labels)
df

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,2.0,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


***

### 2. Display a summary of the basic information about this df and its data. 
<br>

In [22]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 10 entries, a to j
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   animal    10 non-null     object 
 1   age       8 non-null      float64
 2   visit     10 non-null     int64  
 3   priority  10 non-null     object 
dtypes: float64(1), int64(1), object(2)
memory usage: 280.0+ bytes


Info says:

* There are <b><i>10<i></b> entries in the data set
* There is a column named  <b><i>animals<i></b> with 10 values
* There is a column named  <b><i>age<i></b> with 10 values
* There is a column named  <b><i>visit<i></b> with 10 values
* There is a column named  <b><i>priority<i></b> with 10 values
* Out of the four columns, one is <b><i>numeric<b><i><i></b><i></b>, one is  <b><i>float<i></b>, the other is  <b><i>non numeric<i></b>

***

### 3.	Select the data labelled "animal" and "age" in df. 
<br>

In [6]:
df.loc[:, ['animal', 'age']]

Unnamed: 0,animal,age
a,cat,2.5
b,cat,3.0
c,snake,0.5
d,dog,
e,dog,5.0
f,cat,2.0
g,snake,4.5
h,cat,
i,dog,7.0
j,dog,3.0


***

### 4. View the first three lines of this df. 
<br>

In [7]:
df.iloc[:3]

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no


***

### 5.	Select the data labelled "animal" and "age" in df. 
<br>

In [8]:
df.loc[:, ['animal', 'age']]

Unnamed: 0,animal,age
a,cat,2.5
b,cat,3.0
c,snake,0.5
d,dog,
e,dog,5.0
f,cat,2.0
g,snake,4.5
h,cat,
i,dog,7.0
j,dog,3.0


***

### 6. Select the data with row index [3,4,8] and column header ['animal ','age'].
<br>

In [9]:
df.iloc[[3,4,8], 0:2]

Unnamed: 0,animal,age
d,dog,
e,dog,5.0
i,dog,7.0


***

### 7.	Select rows with visits greater than 3. 
<br>

In [10]:
visit_index = df['visit'] > 3
df[visit_index]

Unnamed: 0,animal,age,visit,priority


***

### 8. Select the row with age as the missing value. 
<br>

In [11]:
age_nan = df['age'].isnull()
df[age_nan]

Unnamed: 0,animal,age,visit,priority
d,dog,,3,yes
h,cat,,1,yes


***

### 9.	Select the row of which animal is cat and age is less than 3. 
<br>

In [12]:
import re
cat_three = (df['animal'].str.contains('cat', flags=re.I, regex=True)) & (df['age'] < 3)
df[cat_three]

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,yes
f,cat,2.0,3,no


***

### 10.	Change the age of line f to 1.5. 
<br>

In [13]:
df.at['f','age'] = 1.5
df

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,yes
b,cat,3.0,3,yes
c,snake,0.5,2,no
d,dog,,3,yes
e,dog,5.0,2,no
f,cat,1.5,3,no
g,snake,4.5,1,no
h,cat,,1,yes
i,dog,7.0,2,no
j,dog,3.0,1,no


***

### 11.	Calculates the data sum of the visits column.  
<br>

In [14]:
visits = df['visit'].sum()
visits

19

***

### 12.	Calculate the average age of each animal.  
<br>

In [15]:
avg_animals_age = df['age'].mean()
round(avg_animals_age)

3

***

### 13.	Calculate the number of animals. 
<br>

In [16]:
df.shape[0]

10

***

### 14.	Arrange the data by the descending order of age, and then by the ascending order of visits.
<br>

In [17]:
df.sort_values(by=['age', 'visit'], ascending=[0,1])

Unnamed: 0,animal,age,visit,priority
i,dog,7.0,2,no
e,dog,5.0,2,no
g,snake,4.5,1,no
j,dog,3.0,1,no
b,cat,3.0,3,yes
a,cat,2.5,1,yes
f,cat,1.5,3,no
c,snake,0.5,2,no
h,cat,,1,yes
d,dog,,3,yes


***

### 15.	Replace yes and no of priority columns with true and false. 
<br>

In [18]:
df.loc[df['priority'] == 'yes', 'priority'] = 'true'
df.loc[df['priority'] == 'no', 'priority'] = 'false'
df

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,True
b,cat,3.0,3,True
c,snake,0.5,2,False
d,dog,,3,True
e,dog,5.0,2,False
f,cat,1.5,3,False
g,snake,4.5,1,False
h,cat,,1,True
i,dog,7.0,2,False
j,dog,3.0,1,False


***

### 16.	Replace snake in the animal column with python. 
<br>

In [19]:
df.loc[df['animal'].str.contains(r'snake', flags=re.I, regex=True), 'animal'] = 'python'
df

Unnamed: 0,animal,age,visit,priority
a,cat,2.5,1,True
b,cat,3.0,3,True
c,python,0.5,2,False
d,dog,,3,True
e,dog,5.0,2,False
f,cat,1.5,3,False
g,python,4.5,1,False
h,cat,,1,True
i,dog,7.0,2,False
j,dog,3.0,1,False
