# Day 3: Pandas

In [None]:
!pip install pandas

In [9]:
# import pandas for data manipulation
import pandas as pd

# import numpy for mathematical functions
import numpy as np

# 1. Series

Series is a 1D labeled array and it can hold data of any data type.

#### Create an empty series

In [11]:
pd.Series()

  """Entry point for launching an IPython kernel.


Series([], dtype: float64)

In [10]:
misc = pd.Series([1, 'Spongebob', True, np.nan, 9, 'Captain'])
misc

0            1
1    Spongebob
2         True
3          NaN
4            9
5      Captain
dtype: object

#### Create a series with data

#### Access the value of a series

In [13]:
misc[1]

'Spongebob'

In [14]:
misc[2:5]

2    True
3     NaN
4       9
dtype: object

#### Reverse a Series

In [19]:
misc[6::-1].reset_index(drop=True)

0      Captain
1            9
2          NaN
3         True
4    Spongebob
5            1
dtype: object

#### Reset the index of the series

In [3]:
# drop = True is used to drop the previous index


#### Create a series with index values

In [21]:
data = [1, 'Spongebob', True, np.nan, 9, 'Captain']
ind = ['A','B','C','D','E','F']

misc = pd.Series(data, index = ind)

misc

A            1
B    Spongebob
C         True
D          NaN
E            9
F      Captain
dtype: object

In [22]:
misc[1]

'Spongebob'

In [23]:
misc['B']

'Spongebob'

#### Access multiple values

In [28]:
misc[[0, 3]]

A      1
D    NaN
dtype: object

In [27]:
misc[[0, 2, 4]]

A       1
C    True
E       9
dtype: object

In [29]:
misc[['A', 'D']]

A      1
D    NaN
dtype: object

In [30]:
misc[[1,3,5]]

B    Spongebob
D          NaN
F      Captain
dtype: object

# 2. DataFrame

A Data frame is a 2D data structure. It has a tabular row and column structure.

Dataframes are mutable.

We can perform any arithmetic function on rows as well as columns.

Generally each column is of a specific data type.

Structure of a dataframe:                                

                                    pd.DataFrame(data, index, columns)
                                    
                                    
                                    1. data: Pass the data that you want
                                    2. index: Pass any index values
                                    3. Provide the column names
                             

#### Create a dataframe from a list

In [34]:
pd.DataFrame()

In [33]:
data = [['Chocolate', 23], ['Vanilla', 44], ['Butterscotch', 12]]

pd.DataFrame(data, columns = ['Flavour', 'Price'], index = ['C', 'V', 'B'])

Unnamed: 0,Flavour,Price
C,Chocolate,23
V,Vanilla,44
B,Butterscotch,12


In [37]:
data = [['Chocolate', 23], ['Vanilla', 44], ['Butterscotch', 12], ['Strawb', 22], ['ADF', 14]]
index = ['C','V','B','S','A']
column = ['Icecream', 'Items Sold']

df_icecream = pd.DataFrame(data, index = index, columns = column)
df_icecream

Unnamed: 0,Icecream,Items Sold
C,Chocolate,23
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22
A,ADF,14


#### Create a dataframe from a Dictionary

In [38]:
data = {'IceCream': ['Chocolate', 'Vanilla', 'Butterscotch'],
       'Items Sold': [23, 44, 12]}

pd.DataFrame(data)

Unnamed: 0,IceCream,Items Sold
0,Chocolate,23
1,Vanilla,44
2,Butterscotch,12


## loc (Selection by label) vs iloc (Selection by position)

#### loc (Selection by label)

In [39]:
df_icecream.loc['B']

Icecream      Butterscotch
Items Sold              12
Name: B, dtype: object

In [40]:
df_icecream

Unnamed: 0,Icecream,Items Sold
C,Chocolate,23
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22
A,ADF,14


In [44]:
df_icecream.loc['V': 'S']

Unnamed: 0,Icecream,Items Sold
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22


In [45]:
df_icecream.loc[['V', 'S']]

Unnamed: 0,Icecream,Items Sold
V,Vanilla,44
S,Strawb,22


In [46]:
# Reverse the dataframe
df_icecream.loc['A'::-1]

Unnamed: 0,Icecream,Items Sold
A,ADF,14
S,Strawb,22
B,Butterscotch,12
V,Vanilla,44
C,Chocolate,23


In [47]:
df_icecream.loc['C':'S', 'Icecream']

C       Chocolate
V         Vanilla
B    Butterscotch
S          Strawb
Name: Icecream, dtype: object

#### iloc (Selection by position)

In [48]:
df_icecream

Unnamed: 0,Icecream,Items Sold
C,Chocolate,23
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22
A,ADF,14


In [49]:
df_icecream.iloc[2]

Icecream      Butterscotch
Items Sold              12
Name: B, dtype: object

In [51]:
df_icecream.iloc[1:5]

Unnamed: 0,Icecream,Items Sold
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22
A,ADF,14


In [52]:
# Reverse the dataframe
df_icecream.iloc[5::-1]

Unnamed: 0,Icecream,Items Sold
A,ADF,14
S,Strawb,22
B,Butterscotch,12
V,Vanilla,44
C,Chocolate,23


## Accessing the columns and rows (Slicing)

#### Get the column from a dataframe

In [53]:
df_icecream

Unnamed: 0,Icecream,Items Sold
C,Chocolate,23
V,Vanilla,44
B,Butterscotch,12
S,Strawb,22
A,ADF,14


In [55]:
# Method 1
df_icecream['Items Sold']

C    23
V    44
B    12
S    22
A    14
Name: Items Sold, dtype: int64

In [56]:
# Method 2
df_icecream.Icecream

C       Chocolate
V         Vanilla
B    Butterscotch
S          Strawb
A             ADF
Name: Icecream, dtype: object

**NOTE:** Column names that have whitespace cannot be accessed using '.'

In [57]:
df_icecream.Items Sold

SyntaxError: invalid syntax (<ipython-input-57-5e9ec722b8c2>, line 1)

### Create a New Column

In [58]:
df_icecream['Items Sold (Dec 2021)'] = [1,3,2,4,5]

In [59]:
df_icecream

Unnamed: 0,Icecream,Items Sold,Items Sold (Dec 2021)
C,Chocolate,23,1
V,Vanilla,44,3
B,Butterscotch,12,2
S,Strawb,22,4
A,ADF,14,5


Method 2: Insert

In [60]:
df_icecream.insert(2, 'Items Sold (Nov 2021)', [3,4,1,5,6])

In [61]:
df_icecream

Unnamed: 0,Icecream,Items Sold,Items Sold (Nov 2021),Items Sold (Dec 2021)
C,Chocolate,23,3,1
V,Vanilla,44,4,3
B,Butterscotch,12,1,2
S,Strawb,22,5,4
A,ADF,14,6,5


Method 3: Assign

In [62]:
df_icecream.assign(Location=['Ag', 'Amd', 'Kol', 'Ptn', 'Mum'])

Unnamed: 0,Icecream,Items Sold,Items Sold (Nov 2021),Items Sold (Dec 2021),Location
C,Chocolate,23,3,1,Ag
V,Vanilla,44,4,3,Amd
B,Butterscotch,12,1,2,Kol
S,Strawb,22,5,4,Ptn
A,ADF,14,6,5,Mum


In [63]:
df_icecream

Unnamed: 0,Icecream,Items Sold,Items Sold (Nov 2021),Items Sold (Dec 2021)
C,Chocolate,23,3,1
V,Vanilla,44,4,3
B,Butterscotch,12,1,2
S,Strawb,22,5,4
A,ADF,14,6,5


In [70]:
df_icecream.rename({'Items Sold': 'IC_Sold'}, axis = 1, inplace=True)

In [69]:
df_icecream.rename({'C': 'Choc'}, axis = 0, inplace=True)

In [71]:
df_icecream

Unnamed: 0,Icecream,IC_Sold,Items Sold (Nov 2021),Items Sold (Dec 2021)
Choc,Chocolate,23,3,1
V,Vanilla,44,4,3
B,Butterscotch,12,1,2
S,Strawb,22,5,4
A,ADF,14,6,5


In [73]:
df_icecream.columns = ['ICE', 'SALES', 'NOV_2021', 'DEC_2021']

In [74]:
df_icecream

Unnamed: 0,ICE,SALES,NOV_2021,DEC_2021
Choc,Chocolate,23,3,1
V,Vanilla,44,4,3
B,Butterscotch,12,1,2
S,Strawb,22,5,4
A,ADF,14,6,5


### Adding columns

In [75]:
df_icecream['Total_Sales'] = df_icecream['SALES'] + df_icecream['NOV_2021'] + df_icecream['DEC_2021']

In [76]:
df_icecream

Unnamed: 0,ICE,SALES,NOV_2021,DEC_2021,Total_Sales
Choc,Chocolate,23,3,1,27
V,Vanilla,44,4,3,51
B,Butterscotch,12,1,2,15
S,Strawb,22,5,4,31
A,ADF,14,6,5,25
