## Installing and Running pandas

In [2]:
import pandas as pd
import numpy as np
# import mne as s # EEg signals library

In [5]:
# s.__version__

'1.0.2'

## Introduction to pandas Series

In [3]:
import pandas as pd

In [4]:
products = ['A', 'B', 'C', 'D']
products

['A', 'B', 'C', 'D']

In [5]:
type(products)

list

In [6]:
product_categories = pd.Series(products)

In [7]:
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [8]:
type(product_categories)

pandas.core.series.Series

In [9]:
type(pd.Series(products))

pandas.core.series.Series

In [8]:
daily_rates_dollars = pd.Series([40, 45, 50, 60])
daily_rates_dollars

0    40
1    45
2    50
3    60
dtype: int64

In [9]:
print(daily_rates_dollars)

0    40
1    45
2    50
3    60
dtype: int64


In [3]:
import numpy as np

In [13]:
array_a = np.array([10, 20, 30, 40, 50])
array_a

array([10, 20, 30, 40, 50])

In [14]:
type(array_a)

numpy.ndarray

In [15]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int32

In [16]:
print(series_a)

0    10
1    20
2    30
3    40
4    50
dtype: int32


In [17]:
type(series_a)

pandas.core.series.Series

## Working with Attributes in Python

In [18]:
import pandas as pd

In [11]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [12]:
series_a.dtype

dtype('int64')

In [14]:
series_a.size

5

In [28]:
product_categories = pd.Series(name='category', data=['A', 'B', 'C', 'D'], index=[i for i in range(1,5)])
product_categories

1    A
2    B
3    C
4    D
Name: category, dtype: object

In [16]:
product_categories.dtype

dtype('O')

In [17]:
product_categories.size

4

In [18]:
type(product_categories.size)

int

In [21]:
product_categories.name

'category'

In [27]:
print(product_categories.name)

None


In [24]:
product_categories.name = "Product Categories"
product_categories.description = "Some products"
product_categories

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [25]:
print(product_categories.description)
product_categories.name

Some products


'Product Categories'

In [30]:
print(product_categories.name)

Product Categories


## Using an Index in pandas

In [31]:
import pandas as pd

In [26]:
prices_per_category = {'Product A': 22250, 'Product B' : 16600, 'Product C':15600}
prices_per_category

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [33]:
type(prices_per_category)

dict

In [30]:
prices_per_category = pd.Series(prices_per_category)
prices_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [35]:
type(prices_per_category)

pandas.core.series.Series

In [31]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [37]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## Label-based vs Position-based Indexing

In [38]:
import pandas as pd

In [32]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [33]:
series_a.index

RangeIndex(start=0, stop=5, step=1)

In [41]:
type(series_a.index)

pandas.core.indexes.range.RangeIndex

In [42]:
list(series_a.index)

[0, 1, 2, 3, 4]

In [43]:
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [44]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [45]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## More on Working with Indices in Python

In [46]:
import pandas as pd

In [36]:
series_a = pd.Series([10, 20, 30, 40, 50])
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})

In [37]:
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [38]:
series_a[0]

10

In [39]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [40]:
prices_per_category['Product A']

22250

In [44]:
for i in range(prices_per_category.size):
    print(prices_per_category[i])

22250
16600
12500


In [42]:
# prices_per_category[0]
# prices_per_category[1]
prices_per_category[2]

12500

In [55]:
series_b[1]

10

In [48]:
series_c = pd.Series([10, 20, 30, 40, 50], index = ["4", "5", "6", "7", "1"])
series_c

4    10
5    20
6    30
7    40
1    50
dtype: int64

In [52]:
series = pd.Series([2, 3, 4], index=[2,3,4])
series

2    2
3    3
4    4
dtype: int64

In [55]:
series[2]

2

In [14]:
series_c[0]

10

In [60]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

## Using Methods in Python - Part I

In [61]:
import pandas as pd

In [56]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [57]:

start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [58]:
start_date_deposits.sum()

29500

In [66]:
start_date_deposits.min()

1000

In [67]:
start_date_deposits.max()

4000

2107.1428571428573

In [17]:
start_date_deposits.idxmax()

'9/3/2015'

In [18]:
start_date_deposits.idxmin()

'12/8/2012'

## Using Methods in Python - Part II

In [70]:
import pandas as pd

In [60]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [11]:
start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [62]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [13]:
start_date_deposits.tail()

7/25/2016    2000
5/1/2014     2000
3/29/2013    2000
10/3/2014    2000
9/18/2015    2500
dtype: int64

## Parameters vs Arguments

In [75]:
import pandas as pd

In [63]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [77]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [78]:
start_date_deposits.head(3)

7/4/2014     2000
1/2/2015     2000
12/8/2012    1000
dtype: int64

In [79]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [80]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [64]:
def f(n):
    """
    Just return the n value
    """
    return n

In [81]:
start_date_deposits.head(n=10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [83]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [84]:
start_date_deposits.sum()

29500

In [85]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [86]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [87]:
start_date_deposits.sum()

29500

## Introduction to pandas DataFrames

In [15]:
import pandas as pd

## Creating DataFrames from Scratch

<u>**#1: Construct a DataFrame from a dictionary of lists**</u>

In [68]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600,3333]}
df = pd.DataFrame(data)
df  # easy way to create a dataframe

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,3333


In [69]:
data = {'ProductName':['Product A', 'Product B', 'Product C', 'Product D'], 'ProductPrice':[22250, 16600, 12500,234324]}
df = pd.DataFrame(data) #
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500
3,Product D,234324


<u>**#2: Construct a DataFrame from a dictionary of lists + specify an index**</u>

In [72]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = ['B', 'DD', 'G'])
df

Unnamed: 0,ProductName,ProductPrice
B,Product A,22250
DD,Product B,16600
G,Product C,12500


In [6]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = [1, 2, 3])
df

Unnamed: 0,ProductName,ProductPrice
1,Product A,22250
2,Product B,16600
3,Product C,12500


In [5]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
product_IDs = ['A', 'B', 'C']
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#3: Construct a DataFrame from a list of dictionaries**<u/>

In [73]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [74]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':[12500, 100000]}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,"[12500, 100000]"


In [75]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500},
        {'ProductName':'Product D'}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250.0
1,Product B,16600.0
2,Product C,12500.0
3,Product D,


<u>**#4: Construct a DataFrame from a dictionary of pandas Series**<u/>

In [77]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 12500])

In [78]:
data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [80]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['B', 'A', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['A', 'B', 'C'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product B,22250
B,Product A,16600
C,Product C,12500


In [81]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['C', 'B', 'A'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,12500
B,Product B,16600
C,Product C,22250


<u>**#5: Construct a DataFrame from a list of lists**<u/>

In [83]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data, index=[1, 2, 3], columns=[1, 2])
df

Unnamed: 0,1,2
1,Product A,22250
2,Product B,16600
3,Product C,12500


In [84]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500, 5000]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,Product A,22250,
1,Product B,16600,
2,Product C,12500,5000.0


In [85]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [101]:
df.columns = ['ProductName', 'ProductPrice']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [102]:
df.index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#6: Construct a DataFrame in BEST way**<u/>

In [20]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'])  # , index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [87]:
df.shape

(3, 2)

In [86]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'],
                  index = [1, 2, 3])
df

Unnamed: 0,ProductName,ProductPrice
1,Product A,22250
2,Product B,16600
3,Product C,12500
