# Pandas

The Pandas module is used for working with tabular data. 
It allows us to work with data in table form, such as in CSV
or SQL database formats. We can also create tables of our own,
and edit or add columns or rows to tables(DATAFRAMES AND 
SERIES)

In [1]:
import pandas as pd
pd.__version__

'0.25.1'

### Panda Series

In [2]:
import numpy as np

In [17]:
labels = ['a', 'b', 'c', 'd']
my_list = [10.0, 20.4, 30.3, 40.7]
arr = np.array(my_list)
dicts = {'a':10, 'b':20, 'c':30, 'd':40}

In [18]:
pd.Series(data=my_list)

0    10.0
1    20.4
2    30.3
3    40.7
dtype: float64

In [16]:
pd.Series(arr)

0    10
1    20
2    30
3    40
dtype: int32

In [13]:
pd.Series(data=my_list, index=labels)

a    10
b    20
c    30
d    40
dtype: int64

In [14]:
pd.Series(labels)

0    a
1    b
2    c
3    d
dtype: object

In [15]:
pd.Series(dicts)

a    10
b    20
c    30
d    40
dtype: int64

In [19]:
ser1 = pd.Series(data=['Minna', 'Makurdi', 'Abuja', 'Kaduna'], index=['Niger', 'Benue', 'FCT', 'Kaduna'])

In [20]:
ser1

Niger       Minna
Benue     Makurdi
FCT         Abuja
Kaduna     Kaduna
dtype: object

### Working with Attributes in Python

In [21]:
ser1.dtype

dtype('O')

In [22]:
ser1.size

4

In [26]:
ser1.name = "States and Capital"
ser1.name

'States and Capital'

In [27]:
ser1['Niger']

'Minna'

### Using Methods in Pandas

In [28]:
start_date_deposits = pd.Series({
    '7/4/2014'    : 2000,
    '1/2/2015'    : 2000,
    '12/8/2012'   : 1000,
    '2/20/2015'   : 2000,
    '10/28/2013'  : 2000,
    '4/19/2015'   : 2000,
    '7/4/2016'    : 3000,
    '4/24/2014'   : 2000,
    '9/3/2015'    : 4000,
    '7/25/2016'   : 2000,
    '5/1/2014'    : 2000,
    '3/29/2013'   : 2600,
    '10/3/2014'   : 2000,
    '9/18/2015'   : 2500
})

In [29]:
start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      3000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2600
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [30]:
start_date_deposits.sum()

31100

In [31]:
start_date_deposits.min()

1000

In [32]:
start_date_deposits.idxmin()

'12/8/2012'

In [33]:
start_date_deposits.mean()

2221.4285714285716

In [34]:
start_date_deposits.mode()

0    2000
dtype: int64

In [35]:
start_date_deposits.median()

2000.0

In [36]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [37]:
start_date_deposits.tail()

7/25/2016    2000
5/1/2014     2000
3/29/2013    2600
10/3/2014    2000
9/18/2015    2500
dtype: int64

In [41]:
start_date_deposits.head(3).index

Index(['7/4/2014', '1/2/2015', '12/8/2012'], dtype='object')

In [40]:
start_date_deposits.index

Index(['7/4/2014', '1/2/2015', '12/8/2012', '2/20/2015', '10/28/2013',
       '4/19/2015', '7/4/2016', '4/24/2014', '9/3/2015', '7/25/2016',
       '5/1/2014', '3/29/2013', '10/3/2014', '9/18/2015'],
      dtype='object')

In [42]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      3000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

## Introduction to DataFrames

#### #1: Construct a DataFrame from a dictionary of lists

In [44]:
provisions = {'ProductName':['Milo', 'Peak', 'St. Louise', 'Quaker Oat'],
              'ProductPrice':[1230, 1300, 750, 980]}
df = pd.DataFrame(provisions)
df

Unnamed: 0,ProductName,ProductPrice
0,Milo,1230
1,Peak,1300
2,St. Louise,750
3,Quaker Oat,980


In [47]:
provisions2 = {'ProductName':['Milo', 'Peak', 'St. Louise', 'Quaker Oat', 'Corn Flakes'],
              'ProductPrice':[1230, 1300, 750, 980, 1000]}
df2 = pd.DataFrame(provisions2, index=['a', 'b', 'c', 'd', 'e'])
df2

Unnamed: 0,ProductName,ProductPrice
a,Milo,1230
b,Peak,1300
c,St. Louise,750
d,Quaker Oat,980
e,Corn Flakes,1000


#### #3 Construct a DataFrame from a list of dictionaries

In [49]:
provision3 = [{'ProductName':'Milo',
            'ProductPrice':'1230'},
            {'ProductName':'Peak',
            'ProductPrice':'1300'},
             {'ProductName':'Corn Flakes',
            'ProductPrice':'1000'}]
df3 = pd.DataFrame(provision3)
df3

Unnamed: 0,ProductName,ProductPrice
0,Milo,1230
1,Peak,1300
2,Corn Flakes,1000


#### #4 Construct a DataFrame in a Professional Way

In [50]:
df4 = pd.DataFrame(data=[['Milo', 1230],
                        ['Peak', 1300],
                        ['Tissue',200],
                        ['Dettol', 300]],
                  columns=['Product Name', 'Product Price'],
                  index ='A B C D'.split())
df4

Unnamed: 0,Product Name,Product Price
A,Milo,1230
B,Peak,1300
C,Tissue,200
D,Dettol,300


In [51]:
df4.shape

(4, 2)