In [None]:
import pandas as pd
pd.__version__

# Pandas Series

A one dimensional labelled array. A more powerful Python list.

## Dtypes and Indexes

The dtype of the Series is determined by the entries inside. It also assigns each row an index. Execute the following and pay attention to the rows and dtype.

In [None]:
ice_cream = ["chocolate", "vanilla", "strawberry", "peach"]

In [None]:
ice_cream_series = pd.Series(ice_cream)
print(ice_cream_series)
print("-"*40)
print(type(ice_cream_series))

In [None]:
lottery = [4, 8, 15, 16, 23, 42]
lottery_series = pd.Series(lottery)
print(lottery_series)

In [None]:
registrations = [True, False, False, False, True]
reg_series = pd.Series(registrations)
print(reg_series)

## Series from a Dictionary

Notice that the indexes of the items are the keys from the dictionary. This shows how versitle the indexes can be within a pandas Series.

In [None]:
webster = {
    "Aardvark": "An animal",
    "Banana": "A delicious fruit",
    "Cyan": "A color"
}
webster_series = pd.Series(webster)
print(webster_series)

## Series Attributes

Let's take a look at the attributes on a Pandas series.

In [None]:
about_me = ["Smart", "Handsome", "Charming", "Brilliant", "Humble"]
about_me_series = pd.Series(about_me)
print(about_me_series)

In [None]:
about_me_series.values

In [None]:
about_me_series.index

## Series Methods

In [None]:
prices = [2.99, 445, 200.1, 300, 125.0, 229, 102, 30, 67, 99.4]
prices_series = pd.Series(prices)
print(prices_series)

In [None]:
prices_series.sum()

In [None]:
prices_series.product()


In [None]:
prices_series.min()

In [None]:
prices_series.mean()

In [None]:
prices_series.std()

## Data from CSV

This will use `read_csv` function to create a Series.

In [None]:
csv_data = pd.read_csv("data_files/pokemon.csv", sep=",", usecols=["Pokemon"], squeeze=True)

In [None]:
csv_data

In [None]:
type(csv_data)

## Series Head and Tail

In [None]:
google = pd.read_csv("data_files/google_stock_price.csv", squeeze=True)
google

In [None]:
google.head()

In [None]:
google.tail(3)

## Pandas Series with Python Built Ins

Pandas plays along really well with Python. It's very Pythonic!

In [None]:
len(google)

In [None]:
for item in google.tail(10):
    print(item)

In [None]:
sorted(google.tail(10))

In [None]:
google_dict = dict(google.tail(10))
google_dict

In [None]:
max(google)

In [None]:
min(google)

## More Series Attributes

In [None]:
pokemon = pd.read_csv("data_files/pokemon.csv", sep=",", usecols=["Pokemon"], squeeze=True)

In [None]:
pokemon.tail(10).values

In [None]:
pokemon.index

In [None]:
google.index

In [None]:
pokemon.dtype

In [None]:
google.dtype

In [None]:
pokemon.is_unique

In [None]:
google.is_unique

In [None]:
pokemon.ndim

In [None]:
google.ndim

In [None]:
pokemon.shape

In [None]:
type(pokemon.shape)

In [None]:
pokemon.size

In [None]:
google.size

In [None]:
len(google)

In [None]:
pokemon.name

In [None]:
pokemon.head()

In [None]:
pokemon.name = "Pocket Monsters"

In [None]:
pokemon.head()

## Sorting Values in a Series

While you can use the built in `sorted` Python method, Pandas has some sorted methods too.

In [None]:
# Returns a new series
pokemon.sort_values()

In [None]:
pokemon.sort_values(ascending = False)

In [None]:
# Use the inplace flag to sort and mutate the value
copy_of_pokemon = pokemon.copy()
print(copy_of_pokemon.head(1))
copy_of_pokemon.sort_values(inplace=True)
print(copy_of_pokemon.head(1))

# The `in` operator from Python

In [None]:
"Bulbasaur" in pokemon

In [None]:
# The above is the same as:
"Bulbasaur" in pokemon.index

In [None]:
"Bulbasaur" in pokemon.values

## Extracting Values From Series by Index Position

In [None]:
pokemon[0]

In [None]:
pokemon.head(1)

In [None]:
# Create a new series based on a list of indexes
pokemon[[0,1,2]]

In [None]:
# Create a series using the slicing notation
pokemon[0:5]

In [None]:
pokemon[0:-(len(pokemon) -5)]

In [None]:
pokemon[-5:]

In [None]:
pokemon.tail(5)

## Extract Values Based on Index Labels

In [None]:
pd.read_csv("data_files/pokemon.csv")

In [None]:
# Because there are only two native columns, we can squeeze because the index_col will ensure it's Series-ish form
pokemon_index_by_name = pd.read_csv("data_files/pokemon.csv", index_col = "Pokemon", squeeze = True)

In [None]:
pokemon_index_by_name.head(5)

In [None]:
pokemon_index_by_name[0]

In [None]:
pokemon_index_by_name["Bulbasaur"]

In [None]:
pokemon_index_by_name[["Bulbasaur", "Charizard"]]

In [None]:
# Normally slicing would be exclusive on the upper bound, but here it is inclusive
pokemon_index_by_name["Charmander":"Weedle"]

In [None]:
pokemon_index_by_name.head(5)

In [None]:
pokemon_index_by_name[:"Venusaur"]

## Note on Indexing with Labels

You can get an error if the label doesn't exist. There is a way to select by labels and get a NaN for indexes not found

In [None]:
pokemon_index_by_name.reindex(index=["Pikachu", "NotAPokemon"], fill_value="404 NOT_FOUND")

## Another Way to Retrieve Data from a Series

If you need to get items by index, it is generally good to sort your Series first, especially if you have to query the Series multiple times.

In [None]:
pokemon.head(5)

In [None]:
sorted_pokemon = pokemon.sort_index()

In [None]:
sorted_pokemon.head(5)

In [None]:
sorted_pokemon.get(10000)

In [None]:
sorted_pokemon.get(10000, "Some Default")

In [None]:
sorted_pokemon.get([0,1,2], "this is not a pokemon")

In [None]:
# NOTE: if a single index or key is not found, then it quits and sends back the default
sorted_pokemon.get([0,1,2, 100000], "this is not a pokemon")

## Math with Series

In [None]:
len(google)

In [None]:
google.count()

In [None]:
google.sum()

In [None]:
google.std()

In [None]:
google.mean()

In [None]:
google.max()

In [None]:
google.min()

In [None]:
google.median()

In [None]:
google.mode()

In [None]:
google.is_unique

In [None]:
google.describe()

## Index Max and Index Min

It gives you the index of the min and max values in the Series

In [None]:
google.max()

In [None]:
google.idxmax()

In [None]:
google[google.idxmax()]

In [None]:
google.min()

In [None]:
google.idxmin()

In [None]:
google[google.idxmin()]

## The `.value_counts()` Method

In [None]:
pokemon_index_by_name.head(3)

In [None]:
pokemon_index_by_name.value_counts()

In [None]:
pokemon_index_by_name.value_counts().max()

## The `.apply()` Method

In [None]:
google.head(6)

In [None]:
def classify_performance (number):
    if (number < 100):
        return 'LOW'
    elif (number < 200):
        return 'OKAY'
    else:
        return 'HIGH'

In [None]:
google.head().apply(classify_performance)

In [None]:
google.tail().apply(classify_performance)

In [None]:
google.head().apply(lambda x : "AWESOME" if x > 100 else "GARBAGE")

In [None]:
google.tail().apply(lambda x : "AWESOME" if x > 100 else "GARBAGE")