# Introducton to Pandas


In [1]:
# Import pandas
import pandas as pd

In [2]:
# As you might expect, you can create a series by passing in a list of values. 
# When you do this, Pandas automatically assigns an index starting with zero and
# sets the name of the series to None. Let's work on an example of this.

students = ['akshat', 'madhav', 'aadish']

# Use series function of pandas to make a series data structure
pd.Series(students)

0    akshat
1    madhav
2    aadish
dtype: object

In [5]:
numbers= [1, 2, 3]
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [6]:
# Underneath, pandas does some type conversion. If we create a list of strings and we have 
# one element, a None type, pandas inserts it as a None and uses the type object for the 
# underlying array. 

students = ['akshat', 'madhav', None]
pd.Series(students)

0    akshat
1    madhav
2      None
dtype: object

In [7]:
# However, if we create a list of numbers, integers or floats, and put in the None type,
# pandas automatically converts this to a special floating point value designated as NaN, 
# which stands for "Not a Number".

numbers= [1, 2, None]
pd.Series(numbers)

# Underneath, pandas represents NaN as a floating point number,
# and because integers can be typecast to floats,
# pandas went and converted our integers to floats.

0    1.0
1    2.0
2    NaN
dtype: float64

In [9]:
# NaN is *NOT* equivilent to None and when we try the equality test, the result is False.

# Lets bring in numpy which allows us to generate an NaN value
import numpy as np
# And lets compare it to None
np.nan == None

False

In [10]:
# It turns out that you actually can't do an equality test of NAN to itself. When you do, 
# the answer is always False. 

np.nan == np.nan

False

In [11]:
# Instead, you need to use special functions to test for the presence of not a number, 
# such as the Numpy library isnan().

np.isnan(np.nan)

True

In [8]:
# So keep in mind when you see NaN, it's meaning is similar to None, but it's a 
# numeric value and treated differently for efficiency reasons.

In [14]:
students_scores = {'Alice': 'Physics',
                  'Molly': 'English',
                  'Jack': 'Chemistry'}
s = pd.Series(students_scores)
s

Alice      Physics
Molly      English
Jack     Chemistry
dtype: object

In [15]:
# We can get the index object using the index attribute
s.index

Index(['Alice', 'Molly', 'Jack'], dtype='object')

In [17]:
# A list of tuples
students = [('Alice', 'White'), ('Molly', 'Brown'), ('Jack', 'Green')]
pd.Series(students)

0    (Alice, White)
1    (Molly, Brown)
2     (Jack, Green)
dtype: object

In [18]:
# You can also separate your index creation from the data by passing in the index as a 
# list explicitly to the series.

s = pd.Series(['Physics', 'Chemistry', 'English'], index=['Alice', 'Molly', 'Sam'])
s

Alice      Physics
Molly    Chemistry
Sam        English
dtype: object

In [20]:
# So what happens if your list of values in the index object are not aligned with the keys 
# in your dictionary for creating the series? Well, pandas overrides the automatic creation 
# to favor only and all of the indices values that you provided. So it will ignore from your 
# dictionary all keys which are not in your index, and pandas will add None or NaN type values 
# for any index value you provide, which is not in your dictionary key list.

students_scored = {'Alice': 'Physics',
                  'Jack': 'Chemistry',
                  'Wanda': 'English'}
s = pd.Series(students_scored, index=['Alice', 'Wanda', 'Sam'])
s

Alice    Physics
Wanda    English
Sam          NaN
dtype: object