In [4]:
import pandas as pd 

firstarray = pd.Series ([3,4,5,6])
print(firstarray)
# in this case the indices are automatically 0, 11 
print(firstarray.index)
print(firstarray.array)# this prints the list provided 

0    3
1    4
2    5
3    6
dtype: int64
RangeIndex(start=0, stop=4, step=1)
<NumpyExtensionArray>
[3, 4, 5, 6]
Length: 4, dtype: int64


In [5]:
normaldict = {'first': 10, "second":7, "third": 6}
dictseries = pd.Series(normaldict)
print(dictseries)
print(dictseries.index)
print(dictseries.array)


first     10
second     7
third      6
dtype: int64


In [9]:
dct1 = {"a":10,"B":20,"C":32, "D":50, "E":60}
idx = ["a", "B", "c", "d", "e"]
newseries = pd.Series(dct1, index =idx)
print(newseries)


a    10.0
B    20.0
c     NaN
d     NaN
e     NaN
dtype: float64


### Remarks 
above, when you try to assign index to a dictionary type variable that already has index, 
- the index provided as another list overides the normal present in the dictionary 
- values that have keys that do not match with the index will be replaced by NAN ( non a number )
- and since not a number is a float, the matching index will be floats 

#### the following codes is modifying the index of already defined index 

In [14]:
lstseries = pd.Series([2,3,4,5,6])
print(lstseries)
print(" chaning the index that comes with the default series")
lstseries.index = ["first", "second", "third", "fourth", "fifth"]
print(lstseries)



0    2
1    3
2    4
3    5
4    6
dtype: int64
 chaning the index that comes with the default series
first     2
second    3
third     4
fourth    5
fifth     6
dtype: int64


## Accessing elements in Pandas Series 

In [15]:
import pandas as pd

idx = ["The Shawshank Redemption",
       "The Godfather",
       "The Godfather: Part II",
       "The Dark Knight",
       "Pulp Fiction",
       "12 Angry Men",
       "The Good, the Bad and the Ugly",
       "The Lord of the Rings: The Return of the King",
       "Schindler's List",
       "Fight Club",
       "The Lord of the Rings: The Fellowship of the Ring",
       "Inception",
       "Star Wars: Episode V - The Empire Strikes Back",
       "Forrest Gump"]

lst = [9.3, 9.2, 9.1, 9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.9, 8.8, 8.8, 8.8, 8.8]

ratings = pd.Series(lst, index=idx)
print(ratings)

The Shawshank Redemption                             9.3
The Godfather                                        9.2
The Godfather: Part II                               9.1
The Dark Knight                                      9.0
Pulp Fiction                                         8.9
12 Angry Men                                         8.9
The Good, the Bad and the Ugly                       8.9
The Lord of the Rings: The Return of the King        8.9
Schindler's List                                     8.9
Fight Club                                           8.9
The Lord of the Rings: The Fellowship of the Ring    8.8
Inception                                            8.8
Star Wars: Episode V - The Empire Strikes Back       8.8
Forrest Gump                                         8.8
dtype: float64


In [16]:
# to create a mask for the data, it could be done using boolean pandas series

ratings >9

# this provides a table with numpy. bool as the data 

The Shawshank Redemption                              True
The Godfather                                         True
The Godfather: Part II                                True
The Dark Knight                                      False
Pulp Fiction                                         False
12 Angry Men                                         False
The Good, the Bad and the Ugly                       False
The Lord of the Rings: The Return of the King        False
Schindler's List                                     False
Fight Club                                           False
The Lord of the Rings: The Fellowship of the Ring    False
Inception                                            False
Star Wars: Episode V - The Empire Strikes Back       False
Forrest Gump                                         False
dtype: bool

In [18]:
# Depending on the data we want to retrieve, the mask can be used accordingly 

ratings_greater_than_nine_mask = ratings>9
actual_ratings_greater_than_nine = ratings[ratings_greater_than_nine_mask]
print(actual_ratings_greater_than_nine)


The Shawshank Redemption    9.3
The Godfather               9.2
The Godfather: Part II      9.1
dtype: float64


### Another way
-  to collect the elements from a Panda data frame is to provide a list of indices as slice 


In [32]:
lst2 = ["The Shawshank Redemption", "The Godfather"]
print(ratings[lst2])


The Shawshank Redemption    9.3
The Godfather               9.2
dtype: float64
['The Shawshank Redemption', 'The Godfather']


### Using numpy functions with series 
- because they are faster than the python lists 


In [33]:
import numpy as np
np.count_nonzero (ratings ==8.9)
# this counts the ratings where it is equal to 9

6

In [34]:
# another example is the log functions which acts on the whole data frame
np.log(ratings)

The Shawshank Redemption                             2.230014
The Godfather                                        2.219203
The Godfather: Part II                               2.208274
The Dark Knight                                      2.197225
Pulp Fiction                                         2.186051
12 Angry Men                                         2.186051
The Good, the Bad and the Ugly                       2.186051
The Lord of the Rings: The Return of the King        2.186051
Schindler's List                                     2.186051
Fight Club                                           2.186051
The Lord of the Rings: The Fellowship of the Ring    2.174752
Inception                                            2.174752
Star Wars: Episode V - The Empire Strikes Back       2.174752
Forrest Gump                                         2.174752
dtype: float64

- Two pandas series can be added together, (-) and (* multiplied)
- they have to be of the same size or NAN will be produced 
- the index produced by the operations are sorted which which is how pandas handle indexing 

In [36]:
# for example 
ratings + ratings[ratings>9]

# the operatin was performed where the index matched and where it did not NAN

12 Angry Men                                          NaN
Fight Club                                            NaN
Forrest Gump                                          NaN
Inception                                             NaN
Pulp Fiction                                          NaN
Schindler's List                                      NaN
Star Wars: Episode V - The Empire Strikes Back        NaN
The Dark Knight                                       NaN
The Godfather                                        18.4
The Godfather: Part II                               18.2
The Good, the Bad and the Ugly                        NaN
The Lord of the Rings: The Fellowship of the Ring     NaN
The Lord of the Rings: The Return of the King         NaN
The Shawshank Redemption                             18.6
dtype: float64

## Determining if the value is NAN or not 
- the function pd.isna is used 

In [37]:
ratings_added_maybe = ratings + ratings[ratings>9]
pd.isna(ratings_added_maybe)

12 Angry Men                                          True
Fight Club                                            True
Forrest Gump                                          True
Inception                                             True
Pulp Fiction                                          True
Schindler's List                                      True
Star Wars: Episode V - The Empire Strikes Back        True
The Dark Knight                                       True
The Godfather                                        False
The Godfather: Part II                               False
The Good, the Bad and the Ugly                        True
The Lord of the Rings: The Fellowship of the Ring     True
The Lord of the Rings: The Return of the King         True
The Shawshank Redemption                             False
dtype: bool

## Metadata for series 
this is used when more information that needs to be added to a dataframe such as categorically differentiating 
the rows and columns and their use. 
- it's real world application is series that have ratings, title, duration, release date, ...

In [38]:
ratings.name ="ratings/10"
ratings.index.name = "Movie title"
ratings

# in the above code, it sets the name of the data frame to ratings/ 10 meaning that the ratings are out of 10 
# and that the index part of the data frame represents movie name or titles.

# 

Movie title
The Shawshank Redemption                             9.3
The Godfather                                        9.2
The Godfather: Part II                               9.1
The Dark Knight                                      9.0
Pulp Fiction                                         8.9
12 Angry Men                                         8.9
The Good, the Bad and the Ugly                       8.9
The Lord of the Rings: The Return of the King        8.9
Schindler's List                                     8.9
Fight Club                                           8.9
The Lord of the Rings: The Fellowship of the Ring    8.8
Inception                                            8.8
Star Wars: Episode V - The Empire Strikes Back       8.8
Forrest Gump                                         8.8
Name: ratings/10, dtype: float64

## series can be converted back to dictionaries 
- using method.to_dict()

In [39]:
ratings.to_dict()


{'The Shawshank Redemption': 9.3,
 'The Godfather': 9.2,
 'The Godfather: Part II': 9.1,
 'The Dark Knight': 9.0,
 'Pulp Fiction': 8.9,
 '12 Angry Men': 8.9,
 'The Good, the Bad and the Ugly': 8.9,
 'The Lord of the Rings: The Return of the King': 8.9,
 "Schindler's List": 8.9,
 'Fight Club': 8.9,
 'The Lord of the Rings: The Fellowship of the Ring': 8.8,
 'Inception': 8.8,
 'Star Wars: Episode V - The Empire Strikes Back': 8.8,
 'Forrest Gump': 8.8}