In [43]:
import pandas as pd
import openpyxl

# Create a Series Object from a Python List

In [9]:
# the technical definition a Series is;
# a one-dimensional labeled array
# a Series combines the best of both a list and a dictionary, meaning it is both ordered with ability to associate key-value pairs
# a series can store data such as a colomn in Excel, but also gives it a position in-line 
# i.e. the row label for the cell
# a SERIES also the use of methods

In [10]:
# declare a python list
ice_cream = ["Chocolate", "Vanilla", "Strawberry", "Rum Raisin"]

# Series is a class, and a class is a blueprint for creating objects
pd.Series(ice_cream)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [13]:
# dtype is short for data type, such as an object / string / series of strings
# the numbers on the righthand side are an index or like a key in the key-value pair
# you can assign idenitifiers to list, but if you don't pandas will assign the identifier as the index the item is

In [14]:
lottery = [4, 8, 15, 16, 23, 42]

pd.Series(lottery, index=[1, 2, 3, 4, 5, 6])

1     4
2     8
3    15
4    16
5    23
6    42
dtype: int64

In [None]:
# a SERIES takes more parameters;
# class pandas.Series(data=None, index=None, dtype=None, name=None, copy=False, fastpath=False)

In [15]:
registrations = [True, False, False, False, True]

pd.Series(registrations)

0     True
1    False
2    False
3    False
4     True
dtype: bool

In [16]:
# a Series represents a single columm of data in a spreadsheet
# there are more complex objects such as tables of data, which are multiple series "glued together"

In [32]:
# a more concise way to write a series

In [33]:
countries = pd.Series(["United States", "France", "Germany", "Italy"])

# Create a Series Object from a Dictionary

In [17]:
# this will allow us to associate string values as indices

In [19]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

In [25]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi, [1, 2, 3])

1    NaN
2    NaN
3    NaN
dtype: object

In [24]:
# Note that the Index is first build with the keys from the dictionary. 
# After this the Series is reindexed with the given Index values, hence we get all NaN as a result.

In [26]:
# https://pandas.pydata.org/docs/reference/api/pandas.Series.html

In [27]:
sushi = {
    "Salmon": "Orange",
    "Tuna": "Red",
    "Eel": "Brown"
}

pd.Series(sushi)

Salmon    Orange
Tuna         Red
Eel        Brown
dtype: object

In [28]:
pd.Series(sushi)[2]

'Brown'

In [29]:
pd.Series(sushi)[2:2]

Series([], dtype: object)

In [30]:
pd.Series(sushi[2])

KeyError: 2

In [31]:
# a Pandas Series is able to sort multiple labels

In [34]:
# assigns a series to a new variable

In [35]:
recipe = {
  "Flour": True,
  "Sugar": True,
  "Salt": False
}
series_dict = pd.Series(recipe)

# Intro to Methods on Series

In [36]:
# Series are unmutable meaning that when a method is called on a series, Python is returning a new series

In [38]:
prices = pd.Series([2.99, 4.45, 1.36])
prices

0    2.99
1    4.45
2    1.36
dtype: float64

In [39]:
prices.sum()

8.8

In [40]:
prices.min()

1.36

In [44]:
df1 = pd.DataFrame([['a', 'b'], ['c', 'd']],
    index=['row 1', 'row 2'],
    columns=['col 1', 'col 2'])
df1.to_excel("output.xlsx")

In [45]:
# self-guided installation of openpyxl into pandas_playground envr using conda
# conda install openpyxl
# import openpyxl in to j. notebook

In [47]:
prices_product = prices.product()

In [49]:
type(prices_product)

numpy.float64

In [51]:
prices.mean()

2.9333333333333336

In [54]:
# all of the methods are found at https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.html
# a method can sometimes of parameters (but not always)
# a method can only be called on an object

# Intro to Attributes

In [56]:
# an attribute is an detail or characteristic that an object can tell us about itself
# a method is more a behavior or a command

In [57]:
adjectives = pd.Series(["Smart", "Handsome", "Charming", "Brilliant", "Humble"])
adjectives

0        Smart
1     Handsome
2     Charming
3    Brilliant
4       Humble
dtype: object

In [58]:
adjectives.size

5

In [59]:
adjectives.is_unique

True

In [61]:
adjectives.values

array(['Smart', 'Handsome', 'Charming', 'Brilliant', 'Humble'],
      dtype=object)

In [64]:
# ndarray comes from NumbPy library which is a dependency of Pandas (meaning Pandas needs it in order to run)

In [63]:
# ndarray states for N-dimensional array, meaning that it could be 3d or similar to a matrices

In [65]:
adjectives.index

RangeIndex(start=0, stop=5, step=1)

In [66]:
type(adjectives.index)

pandas.core.indexes.range.RangeIndex

In [67]:
# as shown above, there are many underlying objects built on top of each other

# Parameter and Arguments

In [68]:
# Parameter - is the name that we give to an expected input
# Argument - The concrete value that we provide to a parameter

In [70]:
fruits = ["Apple", "Orange", "Plum", "Grape", "Blueberry"]
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]

pd.Series(fruits, weekdays)

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [72]:
pd.Series(data = fruits, index = weekdays)

Monday           Apple
Tuesday         Orange
Wednesday         Plum
Thursday         Grape
Friday       Blueberry
dtype: object

In [73]:
# if you press shift + tab inside the parentheses for the series, you can see all of the expected parameters and arguments

In [74]:
fruits = ["Apple", "Orange", "Plum", "Grape", "Blueberry", "Watermelon"]
weekdays = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Monday"]

pd.Series(fruits, weekdays)

Monday            Apple
Tuesday          Orange
Wednesday          Plum
Thursday          Grape
Friday        Blueberry
Monday       Watermelon
dtype: object