In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
colors = ['red', 'yellow', 'green', 'blue', 'orange', 'red', 'violet', 'indigo']
# Here I convert the list `colors` using the Series() method.
colors_series = pd.Series(colors)
colors_series

0       red
1    yellow
2     green
3      blue
4    orange
5       red
6    violet
7    indigo
dtype: object

In [8]:
# I can see that color_series is now a pandas Series object.
print(f' Here is the color series type {type(colors_series)}')

 Here is the color series type <class 'pandas.core.series.Series'>


In [9]:
# I can access its autogenerated index by using the .index attribute.
colors_series.index

RangeIndex(start=0, stop=8, step=1)

In [10]:
# I see that the index is the default RangeIndex subclass.
type(colors_series.index)

pandas.core.indexes.range.RangeIndex

In [11]:
# I can access its data by using the values attribute.
colors_series.values

array(['red', 'yellow', 'green', 'blue', 'orange', 'red', 'violet',
       'indigo'], dtype=object)

In [14]:
# I can see that accessing the data in my Series using the .values attribute returns a numpy array.
print(f' The color_series data type is a {type(colors_series.values)}')

 The color_series data type is a <class 'numpy.ndarray'>


In [15]:
# Create a numpy array.
arr = np.array([5, 10, 15, 20, 25, 30, 35, 40, 40])


In [16]:
# Convert my numpy array to a pandas Series.
num_series = pd.Series(arr)
num_series

0     5
1    10
2    15
3    20
4    25
5    30
6    35
7    40
8    40
dtype: int64

In [17]:
# Create a python dictionary.

data = {'a' : 0., 'b' : 1., 'c' : 2., 'd': 3., 'e': 4., 'f': 5.}
# Convert dictionary to a pandas Series

diction_series = pd.Series(data)
diction_series

a    0.0
b    1.0
c    2.0
d    3.0
e    4.0
f    5.0
dtype: float64

In [18]:
# Confirm the conversion.

print(f'My diction_series is a {type(diction_series)}.')

My diction_series is a <class 'pandas.core.series.Series'>.


In [19]:
# The default is the first 5 rows.
colors_series.head()

0       red
1    yellow
2     green
3      blue
4    orange
dtype: object

In [21]:
# Calling the .tail() method with our n = 2 returns a Series with the last two rows.
colors_series.tail(2)

6    violet
7    indigo
dtype: object

In [22]:
# The default for the `.sample()` method is one row.
colors_series.sample()

2    green
dtype: object

In [23]:
# Calling the `.head()`, `.tail()`, or `.sample()` methods on our Series returns a new Series.

print(type(colors_series.head()))
print(type(colors_series.tail()))
print(type(colors_series.sample()))

<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>
<class 'pandas.core.series.Series'>


In [27]:
# Here I change the data type of `numeric_series` to object, as you can see below.
print(num_series.astype(str))

0     5
1    10
2    15
3    20
4    25
5    30
6    35
7    40
8    40
dtype: object


In [28]:
# However, since I didn't reassign that transformation, the original Series' data type is still int64.
print(num_series)

0     5
1    10
2    15
3    20
4    25
5    30
6    35
7    40
8    40
dtype: int64


In [29]:
# Returns the total count, unique value count, most frequent value, and frequency of top value.
colors_series.describe()

count       8
unique      7
top       red
freq        2
dtype: object

In [30]:
# Returns a Series of summary statistics on a column with a numeric data type.
num_series.describe()

count     9.000000
mean     24.444444
std      12.856041
min       5.000000
25%      15.000000
50%      25.000000
75%      35.000000
max      40.000000
dtype: float64

In [31]:
# Default - frequency of unique values.

colors_series.value_counts()

red       2
orange    1
indigo    1
violet    1
yellow    1
green     1
blue      1
dtype: int64

In [32]:
# normalize=True returns the relative frequency of the unique values.
colors_series.value_counts(normalize=True)

red       0.250
orange    0.125
indigo    0.125
violet    0.125
yellow    0.125
green     0.125
blue      0.125
dtype: float64

In [33]:
# normalize=True and ascending=True displays the largest relative frequency last.
colors_series.value_counts(normalize=True, ascending=True)

blue      0.125
green     0.125
yellow    0.125
violet    0.125
indigo    0.125
orange    0.125
red       0.250
dtype: float64

In [44]:
# This allows me to return all observations when there are duplicate max values in a Series.
print(num_series.nlargest(1, keep = 'all'))
print(num_series.nsmallest(1, keep = 'all'))

7    40
8    40
dtype: int64
0    5
dtype: int64


In [45]:
# Using `.sort_values()` on a column with string values returns a Series in alphabetic order, ascending.
colors_series.sort_values()

3      blue
2     green
7    indigo
4    orange
0       red
5       red
6    violet
1    yellow
dtype: object

In [47]:
# It works as you would think with numerical values; setting ascending=False returns values in descending order.
colors_series.sort_values(ascending = False)

1    yellow
6    violet
5       red
0       red
4    orange
7    indigo
2     green
3      blue
dtype: object

In [49]:
# Here I check to see if all of the values in my Series meet my condition.

(colors_series == 'red').all()

False

In [51]:
# Here I check to see if any of the values in my Series meet my condition.

(colors_series == 'red').any()

True

In [52]:
(num_series < 0).any()

False