### Pandas

The Pandas library is built on NumPy and provides easy-to-use
data structures and data analysis tools for the Python
programming language.

<img src="fig1.png"
    style="float: left;"></img>

In [1]:
import pandas as pd

#### Pandas Data Structures 
#### Series
A one-dimensional labeled array capable of holding any data type

<img src="fig2.png"
    style="float: left;"></img>

#### DataFrame
A two-dimensional labeled data structure with columns of potentially different types

<img src="fig3.png"
    style="float: left;"></img>

In [5]:
s = pd.Series([3, -5, 7, 4], index=['a', 'b', 'c', 'd'])

data = {'Country': ['Belgium', 'India', 'Brazil'],
        'Capital': ['Brussels', 'New Delhi', 'Brasília'],
        'Population': [11190846, 1303171035, 207847528]}

df = pd.DataFrame(data, columns=['Country', 'Capital', 'Population'])

#### Read and Write to CSV

pd.read_csv('file.csv', header=None, nrows=5)<br>
df.to_csv('myDataFrame.csv')<br>
pd.read_excel('file.xlsx')<br>
df.to_excel('myDataFrame.csv')

#### Selection

In [9]:
df.iloc[[0],[0]]

Unnamed: 0,Country
0,Belgium


In [10]:
df.loc[[0], ['Country']]

Unnamed: 0,Country
0,Belgium


In [11]:
df[df['Population']>1200000000]

Unnamed: 0,Country,Capital,Population
1,India,New Delhi,1303171035


#### Dropping

In [12]:
df.drop('Country', axis=1)

Unnamed: 0,Capital,Population
0,Brussels,11190846
1,New Delhi,1303171035
2,Brasília,207847528


#### Sort & Rank

In [13]:
df.sort_index()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,India,New Delhi,1303171035
2,Brazil,Brasília,207847528


In [14]:
df.sort_values(by='Country')

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
2,Brazil,Brasília,207847528
1,India,New Delhi,1303171035


In [15]:
df.rank()

Unnamed: 0,Country,Capital,Population
0,1.0,2.0,1.0
1,3.0,3.0,3.0
2,2.0,1.0,2.0


#### DataFrame Information

In [17]:
df.shape

(3, 3)

In [18]:
df.index

RangeIndex(start=0, stop=3, step=1)

In [19]:
df.columns

Index(['Country', 'Capital', 'Population'], dtype='object')

In [20]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Country     3 non-null      object
 1   Capital     3 non-null      object
 2   Population  3 non-null      int64 
dtypes: int64(1), object(2)
memory usage: 200.0+ bytes


In [21]:
df.count()

Country       3
Capital       3
Population    3
dtype: int64

In [22]:
df.sum()

Country              BelgiumIndiaBrazil
Capital       BrusselsNew DelhiBrasília
Population                   1522209409
dtype: object

In [24]:
df.cumsum()

Unnamed: 0,Country,Capital,Population
0,Belgium,Brussels,11190846
1,BelgiumIndia,BrusselsNew Delhi,1314361881
2,BelgiumIndiaBrazil,BrusselsNew DelhiBrasília,1522209409


In [25]:
df.min()

Country        Belgium
Capital       Brasília
Population    11190846
dtype: object

In [26]:
df.max()

Country            India
Capital        New Delhi
Population    1303171035
dtype: object

In [29]:
df.describe()

Unnamed: 0,Population
count,3.0
mean,507403100.0
std,696134600.0
min,11190850.0
25%,109519200.0
50%,207847500.0
75%,755509300.0
max,1303171000.0


In [30]:
df.mean()

Population    5.074031e+08
dtype: float64

In [31]:
df.median()

Population    207847528.0
dtype: float64

#### Applying functions

In [33]:
f = lambda x: x*2
df.apply(f)

Unnamed: 0,Country,Capital,Population
0,BelgiumBelgium,BrusselsBrussels,22381692
1,IndiaIndia,New DelhiNew Delhi,2606342070
2,BrazilBrazil,BrasíliaBrasília,415695056
