# Comparing Pandas to a Python Dictionary

In [1]:
# Confirming Why Pandas is better for data manipulation compare to python's typical dictionary
people= {
    'first_name' : ['Yaqub', 'Muhsina', 'Abubakar', 'Taiwo'],
    'last_name' : ['Gaji', 'Oyinda', 'Abdulkareem', 'Olatunji'],
    'email': ['gaji@123.com', 'oyin@6.com', 'abdul@gmail.com', 'ola@email.ng']
}

In [2]:
import pandas as pd

In [3]:
# Getting the 'first_name' of the dictionary
people['first_name']

['Yaqub', 'Muhsina', 'Abubakar', 'Taiwo']

Changing the Dict to a data frame using Pandas

In [4]:
# Ensure that the method are written properly
df = pd.DataFrame(people) # This to create a visual representation

In [5]:
# Now we can visualize it properly
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Oyinda,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@email.ng


In [6]:
# This is a series: A single column of rows
df['first_name']

0       Yaqub
1     Muhsina
2    Abubakar
3       Taiwo
Name: first_name, dtype: object

In [7]:
# Checking the type of data it is, which is 'series'
type(df['first_name'])

pandas.core.series.Series

In [8]:
# the dot notation also works to access the data in Pandas
df.email 

0       gaji@123.com
1         oyin@6.com
2    abdul@gmail.com
3       ola@email.ng
Name: email, dtype: object

In [9]:
# two square brackets '[[]]' to access the multiple columns
# And pass a list
df[['last_name', 'email']]

Unnamed: 0,last_name,email
0,Gaji,gaji@123.com
1,Oyinda,oyin@6.com
2,Abdulkareem,abdul@gmail.com
3,Olatunji,ola@email.ng


In [10]:
# This attribute provides all the columns
df.columns

Index(['first_name', 'last_name', 'email'], dtype='object')

In [11]:
# To get the rows we use : loc and iloc
# 'iloc' means: Integer location and works by specifying the index
# We can as well get multiple rows
# The outer integer gets the column
df.iloc[[0, 1], 2]

0    gaji@123.com
1      oyin@6.com
Name: email, dtype: object

In [12]:
df

Unnamed: 0,first_name,last_name,email
0,Yaqub,Gaji,gaji@123.com
1,Muhsina,Oyinda,oyin@6.com
2,Abubakar,Abdulkareem,abdul@gmail.com
3,Taiwo,Olatunji,ola@email.ng


In [13]:
# loc
# 'loc' searches by indexes[labels]
# it seems similar here but it normally works with labels
df.loc[[0, 1], ['email', 'last_name']]

Unnamed: 0,email,last_name
0,gaji@123.com,Gaji
1,oyin@6.com,Oyinda


# Indexes

In [14]:
users = {
    'first': ['Yaqub', 'Tobi', 'Anika'],
    'last': ['Gaji', 'Bakre', 'Tiwalola'],
    'email': ['gaji@gmail.com', 'dev@unity.com', 'anika22@udacity.ng']
}

In [25]:
#Turned the python dict to data frame
df1 = pd.DataFrame(users)

In [16]:
df1

Unnamed: 0,first,last,email
0,Yaqub,Gaji,gaji@gmail.com
1,Tobi,Bakre,dev@unity.com
2,Anika,Tiwalola,anika22@udacity.ng


In [17]:
df1['email']

0        gaji@gmail.com
1         dev@unity.com
2    anika22@udacity.ng
Name: email, dtype: object

In [18]:
# Setting email addresses as the indexes
df1.set_index('email')

Unnamed: 0_level_0,first,last
email,Unnamed: 1_level_1,Unnamed: 2_level_1
gaji@gmail.com,Yaqub,Gaji
dev@unity.com,Tobi,Bakre
anika22@udacity.ng,Anika,Tiwalola


In [19]:
# the index is not yet set permanently here
df1

Unnamed: 0,first,last,email
0,Yaqub,Gaji,gaji@gmail.com
1,Tobi,Bakre,dev@unity.com
2,Anika,Tiwalola,anika22@udacity.ng


In [20]:
# Setting indexes 'inplace' so they can reflect on the actual data
df1.set_index('email', inplace=True)

In [21]:
df1
df1.index

Index(['gaji@gmail.com', 'dev@unity.com', 'anika22@udacity.ng'], dtype='object', name='email')

In [22]:
# Now each row as a unique identifier
df1.loc['gaji@gmail.com', 'last']

'Gaji'

In [23]:
# 'iloc' still works because it is an integer locator
df1.iloc[0]

first    Yaqub
last      Gaji
Name: gaji@gmail.com, dtype: object

In [24]:
# This is to reset the data index
df1.reset_index(inplace= True)