In [1]:
# importing pandas to get started
import pandas as pd

In [2]:
# we can create a series by passing a list of values and when we do this, Pandas automatically asigns an index starting
# with zero and sets the name of the Series to None.

# The easiest way to create a series is to use an array-like object, like a list
students = ['Alice', 'Jack', 'Molly']

# now just call the Series function in pandas and pass in the students
pd.Series(students)

0    Alice
1     Jack
2    Molly
dtype: object

In [3]:
# The result ia a Series object which is  nicely rendered to the screen
#  We see here that the pandas has automatically identified the type of data in the series as object and set the dtype 
# parameter as appropriate. We see that the values are indexed with integers starting with zero. Now we don't have to use
# strings. If we passed in a list of whole numbers for instance, we can see that pandas sets the type to int 64. Underneath,
# pandas storage series values in a typed array using the Numpy library. This offer significant speed-up when processing data
# versus traditional python lists. 

In [4]:
# create a list of numbers
numbers =[1,2,3]
# and turn that into Series
pd.Series(numbers)

0    1
1    2
2    3
dtype: int64

In [5]:
# There's some other typing details that exists for performance that are important to know. The most important is how
# Numpy and thus pandas handle missing data. In Python, we have the none type to indicate a lack of data. But what do we do
# if we want to have a typed list like this in the series object? Underneath, pandas does some type conversion for us. 
# If we create a list of strings and we have one element, a None type, pandas inserts that as a None and uses the type object
# for the underlying array

# let's recreate our list of students, but leave the last one as None

students = ['Alice', 'Jack', None]
# and lets convert this to a Series
pd.Series(students)

0    Alice
1     Jack
2     None
dtype: object

In [6]:
# if we create a list of numbers, integers, or floats and put it in a None type, pandas automatically converts this into 
# a special floating-point value designated as NaN which stands for Not a Number.
# So let's see an example of this. 
numbers = [1,2, None]
pd.Series(numbers)

# NaN is not equivalent to None and when we try the equality test, the reault is False

0    1.0
1    2.0
2    NaN
dtype: float64

In [7]:
# Let's bring in numpy which allows us to generate an Nan value
import numpy as np
# and compare it to None
np.nan == None

False

In [8]:
# We can't do an equality test of NaN to itself.
# NaN stands for "Not a Number"
np.nan == np.nan

False

In [9]:
# We need to use special function to tst for the presence of a not a number
# such as the Numpy libraray isnan().

np.isnan(np.nan)

True

In [10]:
# NaN is similar to None but it's a numeric value and treated differently 
# for efficiency reasons.


In [11]:
# my List might be the common way to create some play data, often we have to label;
# data and manipulate
# A series can be created directly from dictionary data, an if this is done the index
# is automatically assigned to the keys of the dictionary provided 

In [13]:
# Example

students_scores = {'Alice': 'Physics',
                  'Jack': 'Chemistry',
                  'Molly': 'English'}
s = pd.Series(students_scores)
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [14]:
# Since the data was a string, pandas set thge data type of the series to 'object'
# We also see that the index, first column is also a string

In [15]:
# Once the series has been crested, we can get the index object using the index attribute

s.index

Index(['Alice', 'Jack', 'Molly'], dtype='object')

In [17]:
# As we work with pandas w notice that a lot of things are implemented as numpy
# arrays and have the dtype value set. This is true indices and here pandas infered
# that we were using objects for the index

# The dtype of object is not just for strings, but for arbitary objects

# Lets create a more complex type of data, a list of tuples

students = [('Alice', 'Brown'), ('Jack', 'White'), ('Molly', 'Green')]
pd.Series(students)

0    (Alice, Brown)
1     (Jack, White)
2    (Molly, Green)
dtype: object

In [18]:
# Each of the tuple is stored in the series object and the type is object

In [19]:
# We can also seperate our index creation from th edata by passing in the index
# a list explicitly to the series.

s = pd.Series(['Physics', 'Chemistry', 'English'], index = ['Alice', 'Jack', 'Molly'])
s

Alice      Physics
Jack     Chemistry
Molly      English
dtype: object

In [22]:
# So what happens if your list of values in the index object are not aligned
# with the keys in your dictionary for creating the series? Well, pandas 
# overrides the automatic creation to favor only and all of the index values
# that you provide it. So we'll ignore from your dictionary all keys which 
# are not near index and pandas will add None or NaN type values for any
# index value you provide, which is not in your dictionary key list.

# Here's an example, a pass in a dictionary of three items in this case the
# students and their courses. 
students_scores = {'Alice':'Physics', 'Jack': 'Chemistry', 'Molly': 'English'}
# when creating the sseries object we onlu ask for an index with three students 
# and exclude Jack

s = pd.Series(students_scores, index = ['Alice', 'Molly', 'Sam'])
s

Alice    Physics
Molly    English
Sam          NaN
dtype: object

In [None]:
# The result is that the Series object doesn't have Jack in it even though he was 
# in our original dataset, not explicitly does have Sam in it as a missing value