In [5]:
import pandas as pd
import openpyxl

# Create a Series Object from a Python List

In [6]:
# the technical definition a Series is;
# a one-dimensional labeled array
# a Series combines the best of both a list and a dictionary, meaning it is both ordered with ability to associate key-value pairs
# a series can store data such as a colomn in Excel, but also gives it a position in-line 
# i.e. the row label for the cell
# a SERIES also the use of methods

In [10]:
# declare a python list
ice_cream = ["Chocolate", "Vanilla", "Strawberry", "Rum Raisin"]

# Series is a class, and a class is a blueprint for creating objects
pd.Series(ice_cream)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [13]:
# dtype is short for data type, such as an object / string / series of strings
# the numbers on the righthand side are an index or like a key in the key-value pair
# you can assign idenitifiers to list, but if you don't pandas will assign the identifier as the index the item is

In [14]:
lottery = [4, 8, 15, 16, 23, 42]

pd.Series(lottery, index=[1, 2, 3, 4, 5, 6])

1     4
2     8
3    15
4    16
5    23
6    42
dtype: int64

In [None]:
# a SERIES takes more parameters;
# class pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

In [15]:
registrations = [True, False, False, False, True]

pd.Series(registrations)

0     True
1    False
2    False
3    False
4     True
dtype: bool

In [16]:
# a Series represents a single columm of data in a spreadsheet
# there are more complex objects such as tables of data, which are multiple series "glued together"

In [32]:
# a more concise way to write a series

In [33]:
countries = pd.Series(["United States", "France", "Germany", "Italy"])

# Create a Series Object from a Dictionary

In [17]:
# this will allow us to associate string values as indices

In [19]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

In [25]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi, [1, 2, 3])

1    NaN
2    NaN
3    NaN
dtype: object

In [24]:
# Note that the Index is first build with the keys from the dictionary. 
# After this the Series is reindexed with the given Index values, hence we get all NaN as a result.

In [26]:
# https://pandas.pydata.org/docs/reference/api/pandas.Series.html

In [27]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

In [28]:
pd.Series(sushi)[2]

'Brown'

In [29]:
pd.Series(sushi)[2:2]

Series([], dtype: object)

In [30]:
pd.Series(sushi[2])

KeyError: 2

In [31]:
# a Pandas Series is able to sort multiple labels

In [34]:
# assigns a series to a new variable

In [35]:
recipe = {
  "Flour": True,
  "Sugar": True,
  "Salt": False
}
series_dict = pd.Series(recipe)

# Intro to Methods on Series

In [36]:
# Series are unmutable meaning that when a method is called on a series, Python is returning a new series

In [38]:
prices = pd.Series([2.99, 4.45, 1.36])
prices

0    2.99
1    4.45
2    1.36
dtype: float64

In [39]:
prices.sum()

8.8

In [40]:
prices.min()

1.36

In [44]:
df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
    index=['row 1', 'row 2'],
    columns=['col 1', 'col 2'])
df1.to_excel("output.xlsx")

In [45]:
# self-guided installation of openpyxl into pandas_playground envr using conda
# conda install openpyxl
# import openpyxl in to j. notebook

In [47]:
prices_product = prices.product()

In [49]:
type(prices_product)

numpy.float64

In [51]:
prices.mean()

2.9333333333333336

In [54]:
# all of the methods are found at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html
# a method can sometimes of parameters (but not always)
# a method can only be called on an object

# Intro to Attributes

In [56]:
# an attribute is an detail or characteristic that an object can tell us about itself
# a method is more a behavior or a command

In [57]:
adjectives = pd.Series(["Smart", "Handsome", "Charming", "Brilliant", "Humble"])
adjectives

0        Smart
1     Handsome
2     Charming
3    Brilliant
4       Humble
dtype: object

In [58]:
adjectives.size

5

In [59]:
adjectives.is_unique

True

In [61]:
adjectives.values

array(['Smart', 'Handsome', 'Charming', 'Brilliant', 'Humble'],
      dtype=object)

In [64]:
# ndarray comes from NumbPy library which is a dependency of Pandas (meaning Pandas needs it in order to run)

In [63]:
# ndarray states for N-dimensional array, meaning that it could be 3d or similar to a matrices

In [65]:
adjectives.index

RangeIndex(start=0, stop=5, step=1)

In [66]:
type(adjectives.index)

pandas.core.indexes.range.RangeIndex

In [67]:
# as shown above, there are many underlying objects built on top of each other

# Parameter and Arguments

In [68]:
# Parameter - is the name that we give to an expected input
# Argument - The concrete value that we provide to a parameter

In [70]:
fruits = ["Apple", "Orange", "Plum", "Grape", "Blueberry"]
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

pd.Series(fruits, weekdays)

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [72]:
pd.Series(data = fruits, index = weekdays)

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [73]:
# if you press shift + tab inside the parentheses for the series, you can see all of the expected parameters and arguments

In [74]:
fruits = ["Apple", "Orange", "Plum", "Grape", "Blueberry", "Watermelon"]
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Monday"]

pd.Series(fruits, weekdays)

Monday            Apple
Tuesday          Orange
Wednesday          Plum
Thursday          Grape
Friday        Blueberry
Monday       Watermelon
dtype: object

## Import Series with the pd.read_csv Function

In [8]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"]).squeeze("columns")
google = pd.read_csv("google_stock_price.csv", usecols = ["Stock Price"]).squeeze("columns")

## The head and tail Methods on a Series

In [4]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [5]:
google.tail()

3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, dtype: float64

In [6]:
google

0        50.12
1        54.10
2        54.65
3        52.38
4        52.95
         ...  
3007    772.88
3008    771.07
3009    773.18
3010    771.61
3011    782.22
Name: Stock Price, Length: 3012, dtype: float64

## Passing Series to Python's Built-In Functions

In [3]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"]).squeeze("columns")
google = pd.read_csv("google_stock_price.csv", usecols = ["Stock Price"]).squeeze("columns")

In [10]:
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [11]:
len(pokemon)

721

In [4]:
type(pokemon)

pandas.core.series.Series

In [13]:
dir(pokemon)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__array_wrap__',
 '__bool__',
 '__class__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__long__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__',
 '__rdivmod__',
 '__redu

In [None]:
## dir returns a list of the available attributes and methods for the object

## Check for Inclusion with Python's in Keyword

In [8]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"]).squeeze("columns")
pokemon.head()

0     Bulbasaur
1       Ivysaur
2      Venusaur
3    Charmander
4    Charmeleon
Name: Pokemon, dtype: object

In [9]:
"car" in "racecar"

True

In [10]:
# we can also check whether or not a value is found within a list

In [11]:
2 in [1, 2, 3]

True

In [12]:
# this evaluates to True because 2 is within the list
# we can also provide a panda series as well

**the "in" keyword by default is going to check within the index NOT the values**

In [13]:
"Bulbasaur" in pokemon

False

In [14]:
100 in pokemon

True

In [16]:
"Bulbasaur" in pokemon.index

False

In [17]:
"Bulbasaur" in pokemon.values

True

In [18]:
pokemon.values

array(['Bulbasaur', 'Ivysaur', 'Venusaur', 'Charmander', 'Charmeleon',
       'Charizard', 'Squirtle', 'Wartortle', 'Blastoise', 'Caterpie',
       'Metapod', 'Butterfree', 'Weedle', 'Kakuna', 'Beedrill', 'Pidgey',
       'Pidgeotto', 'Pidgeot', 'Rattata', 'Raticate', 'Spearow', 'Fearow',
       'Ekans', 'Arbok', 'Pikachu', 'Raichu', 'Sandshrew', 'Sandslash',
       'Nidoran', 'Nidorina', 'Nidoqueen', 'Nidoran♂', 'Nidorino',
       'Nidoking', 'Clefairy', 'Clefable', 'Vulpix', 'Ninetales',
       'Jigglypuff', 'Wigglytuff', 'Zubat', 'Golbat', 'Oddish', 'Gloom',
       'Vileplume', 'Paras', 'Parasect', 'Venonat', 'Venomoth', 'Diglett',
       'Dugtrio', 'Meowth', 'Persian', 'Psyduck', 'Golduck', 'Mankey',
       'Primeape', 'Growlithe', 'Arcanine', 'Poliwag', 'Poliwhirl',
       'Poliwrath', 'Abra', 'Kadabra', 'Alakazam', 'Machop', 'Machoke',
       'Machamp', 'Bellsprout', 'Weepinbell', 'Victreebel', 'Tentacool',
       'Tentacruel', 'Geodude', 'Graveler', 'Golem', 'Ponyta', 'Rapidash'

## Extract Series Values by Index Position

In [20]:
# this same process of extracting a series value by index can also be done with a pandas series
numbers = [1, 2, 3]
numbers[1]

2

In [27]:
pokemon = pd.read_csv("pokemon.csv", usecols = ["Pokemon"]).squeeze("columns")
pokemon.head(3)

0    Bulbasaur
1      Ivysaur
2     Venusaur
Name: Pokemon, dtype: object

In [28]:
pokemon[0]

'Bulbasaur'

In [29]:
# providing a list requires 
pokemon[[100, 200, 300]]

100    Electrode
200        Unown
300     Delcatty
Name: Pokemon, dtype: object

In [30]:
# remember first index is inclusive and the last is exclusive
pokemon[50:101]

50        Dugtrio
51         Meowth
52        Persian
53        Psyduck
54        Golduck
55         Mankey
56       Primeape
57      Growlithe
58       Arcanine
59        Poliwag
60      Poliwhirl
61      Poliwrath
62           Abra
63        Kadabra
64       Alakazam
65         Machop
66        Machoke
67        Machamp
68     Bellsprout
69     Weepinbell
70     Victreebel
71      Tentacool
72     Tentacruel
73        Geodude
74       Graveler
75          Golem
76         Ponyta
77       Rapidash
78       Slowpoke
79        Slowbro
80      Magnemite
81       Magneton
82     Farfetch'd
83          Doduo
84         Dodrio
85           Seel
86        Dewgong
87         Grimer
88            Muk
89       Shellder
90       Cloyster
91         Gastly
92        Haunter
93         Gengar
94           Onix
95        Drowzee
96          Hypno
97         Krabby
98        Kingler
99        Voltorb
100     Electrode
Name: Pokemon, dtype: object

In [31]:
pokemon[:50]

0      Bulbasaur
1        Ivysaur
2       Venusaur
3     Charmander
4     Charmeleon
5      Charizard
6       Squirtle
7      Wartortle
8      Blastoise
9       Caterpie
10       Metapod
11    Butterfree
12        Weedle
13        Kakuna
14      Beedrill
15        Pidgey
16     Pidgeotto
17       Pidgeot
18       Rattata
19      Raticate
20       Spearow
21        Fearow
22         Ekans
23         Arbok
24       Pikachu
25        Raichu
26     Sandshrew
27     Sandslash
28       Nidoran
29      Nidorina
30     Nidoqueen
31      Nidoran♂
32      Nidorino
33      Nidoking
34      Clefairy
35      Clefable
36        Vulpix
37     Ninetales
38    Jigglypuff
39    Wigglytuff
40         Zubat
41        Golbat
42        Oddish
43         Gloom
44     Vileplume
45         Paras
46      Parasect
47       Venonat
48      Venomoth
49       Diglett
Name: Pokemon, dtype: object

In [33]:
# pokemon[-1] will not work, but
pokemon[-20:-10] #will work

701      Dedenne
702      Carbink
703        Goomy
704      Sliggoo
705       Goodra
706       Klefki
707     Phantump
708    Trevenant
709    Pumpkaboo
710    Gourgeist
Name: Pokemon, dtype: object

## Extract Values by Index Label