<a href="https://colab.research.google.com/github/brownsloth/3by3/blob/main/chapter_02_the_series_object/chapter_02_the_series_object.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 2.1 Overview of a Series

In [1]:
import pandas as pd
import numpy as np

### 2.1.1 Classes, and Instances

In [4]:
s = pd.Series()

In [5]:
s.head()

Unnamed: 0,0


In [8]:
print(s.shape)

(0,)


### 2.1.2 Populating the Series with Values

In [9]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

pd.Series(ice_cream_flavors)

Unnamed: 0,0
0,Chocolate
1,Vanilla
2,Strawberry
3,Rum Raisin


In [10]:
# The two lines below are equivalent
pd.Series(ice_cream_flavors)
pd.Series(data = ice_cream_flavors)

Unnamed: 0,0
0,Chocolate
1,Vanilla
2,Strawberry
3,Rum Raisin


In [11]:
my_friends = [
    {"name": "abhinav", "age": 30},
    20
]

pd.Series(my_friends)

Unnamed: 0,0
0,"{'name': 'abhinav', 'age': 30}"
1,20


### 2.1.3 Customizing the Series Index

In [13]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Saturday")

# The two lines below are equivalent
pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data = ice_cream_flavors, index = days_of_week)

Unnamed: 0,0
Monday,Chocolate
Wednesday,Vanilla
Friday,Strawberry
Saturday,Rum Raisin


In [18]:
my_friends = [
    "abhinav",
    "ayush",
    "venkat"
]
my_age = [
    29,
    29,
    30
]
friend_ages = pd.Series(my_age, my_friends)
friend_ages.head()

Unnamed: 0,0
abhinav,29
ayush,29
venkat,30


In [19]:
friend_ages.index

Index(['abhinav', 'ayush', 'venkat'], dtype='object')

In [None]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Wednesday")

# The two lines below are equivalent
pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data = ice_cream_flavors, index = days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [None]:
pd.Series(index = days_of_week, data = ice_cream_flavors)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [None]:
bunch_of_bools = [True, False, False]
pd.Series(bunch_of_bools)

0     True
1    False
2    False
dtype: bool

In [None]:
stock_prices = [985.32, 950.44]
time_of_day = ["Open", "Close"]
pd.Series(data = stock_prices, index = time_of_day)

Open     985.32
Close    950.44
dtype: float64

In [None]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

In [21]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
lc = pd.Series(lucky_numbers, dtype = "float")

In [24]:
lc[0] = 111
print(lc.head())
print(lucky_numbers)

0    111.0
1      8.0
2     15.0
3     16.0
4     23.0
dtype: float64
[4, 8, 15, 16, 23, 42]


In [26]:
lucky_numbers_1 = pd.Series(lucky_numbers, dtype = "float")
print(lucky_numbers_1)
lucky_numbers_1[0] = 112
print(lucky_numbers_1)
print(lc)

0     4.0
1     8.0
2    15.0
3    16.0
4    23.0
5    42.0
dtype: float64
0    112.0
1      8.0
2     15.0
3     16.0
4     23.0
5     42.0
dtype: float64
0    111.0
1      8.0
2     15.0
3     16.0
4     23.0
5     42.0
dtype: float64


In [20]:
lucky_numbers = [4, "eight", 15, 16, "chwenchy chee", 42]
pd.Series(lucky_numbers, dtype = "float", )

ValueError: could not convert string to float: 'eight'

### 2.1.4 Creating a Series with Missing Values

In [27]:
temperatures = [94, 88, np.nan, 91]
pd.Series(data = temperatures)

Unnamed: 0,0
0,94.0
1,88.0
2,
3,91.0


## 2.2 Create a Series from Python Objects

In [28]:
calorie_info = {
    "Cereal": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342,
}

diet = pd.Series(calorie_info)
diet

Unnamed: 0,0
Cereal,125
Chocolate Bar,406
Ice Cream Sundae,342


In [29]:
my_friends = {
    "abhinav": 29,
    "ayush": 30,
    "kshitiz": 40
}

pd.Series(my_friends)

Unnamed: 0,0
abhinav,29
ayush,30
kshitiz,40


In [30]:
pd.Series(data = ("Red", "Green", "Blue"))

Unnamed: 0,0
0,Red
1,Green
2,Blue


In [31]:
rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(data = rgb_colors)

Unnamed: 0,0
0,"(120, 41, 26)"
1,"(196, 165, 45)"


**NOTE**: I've commented out the code below so that the Notebook can run without raising an error.

In [32]:
my_set = {"Ricky", "Bobby"}
pd.Series(my_set)

TypeError: 'set' type is unordered

In [33]:
pd.Series(list(my_set))

Unnamed: 0,0
0,Ricky
1,Bobby


In [35]:
random_data = np.random.randint(1, 101, 10)
random_data

array([ 4, 47, 45, 26, 94, 15, 77,  1, 80, 18])

In [43]:
pd.Series(random_data).index.values

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

## 2.3 Series Attributes

In [44]:
diet.values

array([125, 406, 342])

In [45]:
type(diet.values)

numpy.ndarray

In [46]:
diet.index

Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')

In [48]:
diet.index.values

array(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype=object)

In [51]:
pd.DataFrame({'a':np.arange(0,10), 'b':np.arange(10,20)}).values

array([[ 0, 10],
       [ 1, 11],
       [ 2, 12],
       [ 3, 13],
       [ 4, 14],
       [ 5, 15],
       [ 6, 16],
       [ 7, 17],
       [ 8, 18],
       [ 9, 19]])

In [47]:
type(diet.index)

pandas.core.indexes.base.Index

In [None]:
diet.dtype

dtype('int64')

In [None]:
diet.size

3

In [None]:
diet.shape

(3,)

In [56]:
diet['oats'] = 125
print(diet)

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
oats                125
dtype: int64


In [57]:
diet.is_unique

False

In [None]:
pd.Series(data = [3, 3]).is_unique

False

In [59]:
pd.Series(data = [1, 3, 6]).is_monotonic_increasing

True

In [60]:
pd.Series(data = [1, 3, 6]).is_monotonic_decreasing
# The `len(x) - 1` in the custom_agg_std function is used for Bessel's correction
# when calculating the sample standard deviation. It helps provide an unbiased
# estimate of the population standard deviation from the sample data.

False

In [63]:
pd.Series(data = [0,0,0]).is_monotonic_decreasing

True

In [64]:
pd.Series(data = [0,0,-1]).is_monotonic_decreasing

True

In [65]:
pd.Series(data = [0,0,0]).is_monotonic_increasing

True

In [66]:
pd.Series(data = [0,0,1]).is_monotonic_increasing

True

In [71]:
x = pd.Series(data = [1, 6, 3])
x.is_monotonic_decreasing or x.is_monotonic_decreasing # this is python's or since is_monotonic_increasing gives a unitary

False

## 2.4 Retrieving the First and Last Rows

In [73]:
values = range(0, 501, 5)
nums = pd.Series(data = values)
nums

Unnamed: 0,0
0,0
1,5
2,10
3,15
4,20
...,...
96,480
97,485
98,490
99,495


In [None]:
nums.head(3)

0     0
1     5
2    10
dtype: int64

In [None]:
nums.head(n = 3)

0     0
1     5
2    10
dtype: int64

In [None]:
nums.head()

0     0
1     5
2    10
3    15
4    20
dtype: int64

In [74]:
nums.tail(6)

Unnamed: 0,0
95,475
96,480
97,485
98,490
99,495
100,500


In [75]:
nums.tail()

Unnamed: 0,0
96,480
97,485
98,490
99,495
100,500


## 2.5 Mathematical Operations

### 2.5.1 Statistical Operations

In [76]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

Unnamed: 0,0
0,1.0
1,2.0
2,3.0
3,
4,4.0
5,5.0


In [77]:
numbers.count()

np.int64(5)

In [86]:
numbers.notna().sum()

np.int64(5)

In [93]:
numbers.sum()

np.float64(15.0)

In [None]:
numbers.sum(skipna = False)

nan

In [96]:
## If at least 3 non na values then gimme sum else gimme nan
numbers.sum(min_count = 3)

np.float64(15.0)

In [97]:
numbers.sum(min_count = 6)

np.float64(nan)

In [98]:
numbers.product()

np.float64(120.0)

In [99]:
numbers.product(min_count=1)

np.float64(120.0)

In [100]:
numbers.product(min_count=6)

np.float64(nan)

In [101]:
numbers.product(skipna = False)

np.float64(nan)

In [None]:
numbers.product(min_count = 3)

120.0

In [None]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [102]:
numbers.cumsum()

Unnamed: 0,0
0,1.0
1,3.0
2,6.0
3,
4,10.0
5,15.0


In [None]:
numbers.cumsum(skipna = False)

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64

In [None]:
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [None]:
numbers.pct_change()

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [None]:
# The three lines below are equivalent
numbers.pct_change()
numbers.pct_change(fill_method = "pad")
numbers.pct_change(fill_method = "ffill")

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

In [None]:
# The two lines below are equivalent
numbers.pct_change(fill_method = "bfill")
numbers.pct_change(fill_method = "backfill")

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64

In [104]:
numbers.mean() == numbers.sum() / numbers.count()

np.True_

In [119]:
numbers.shape[0]

6

In [123]:
def custom_agg_median(x):
    x.dropna(inplace = True)
    x.sort_values(inplace = True)
    if x.shape[0] % 2 == 0:
      return np.average(x[len(x) // 2 - 1:len(x) // 2 +1])
    return x[len(x)//2]

In [124]:
numbers.agg(custom_agg_median)

np.float64(3.0)

In [125]:
numbers.median() == numbers.agg(custom_agg_median)

np.True_

In [137]:
def custom_agg_std(x):
    x = x.dropna()
    x_bar = x.mean()
    return np.sqrt(((x-x_bar)**2).sum()/ (len(x)-1) )

In [138]:
print(numbers.agg(custom_agg_std))

1.5811388300841898


In [139]:
numbers.std() == numbers.agg(custom_agg_std)

np.True_

In [140]:
numbers.max()

5.0

In [141]:
numbers.min()

1.0

In [142]:
animals = pd.Series(["koala", "aardvark", "zebra"])
animals

Unnamed: 0,0
0,koala
1,aardvark
2,zebra


In [157]:
def custom_sort_alphabetic(x):
  y = x.sort_values(key = lambda x:x, ascending=True, inplace=False)
  return y.iloc[-1]

In [159]:
animals.agg(custom_sort_alphabetic)

'zebra'

In [160]:
animals.max() == animals.agg(custom_sort_alphabetic)

True

In [161]:
animals.min()

'aardvark'

In [168]:
numbers *= 1.25

In [170]:
numbers

Unnamed: 0,0
0,3.75
1,7.5
2,11.25
4,15.0
5,18.75


In [169]:
numbers.describe()

Unnamed: 0,0
count,5.0
mean,11.25
std,5.929271
min,3.75
25%,7.5
50%,11.25
75%,15.0
max,18.75


In [188]:
# np.random.seed(313)
np.random.seed(None)
numbers.sample(3)

Unnamed: 0,0
5,18.75
2,11.25
0,3.75


In [192]:
authors = pd.Series(
    ["Hemingway", "Orwell", "Dostoevsky", "Fitzgerald", "Orwell", "Fitzgerald"]
)
## Keeps the first occurences
authors.unique()

array(['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [193]:
authors.nunique()

4

### 2.5.2 Arithmetic Operations

In [195]:
s1 = pd.Series(data = [5, np.nan, 15], index = ["A", "B", "C"])
s1

Unnamed: 0,0
A,5.0
B,
C,15.0


In [196]:
s1 + 3

Unnamed: 0,0
A,8.0
B,
C,18.0


In [197]:
s1.add(3)

Unnamed: 0,0
A,8.0
B,
C,18.0


In [199]:
s1 ## new copy created

Unnamed: 0,0
A,5.0
B,
C,15.0


In [200]:
# The three lines below are equivalent
s1 - 5
s1.sub(5)
s1.subtract(5)

Unnamed: 0,0
A,0.0
B,
C,10.0


In [None]:
# The three lines below are equivalent
s1 * 2
s1.mul(2)
s1.multiply(2)

A    10.0
B     NaN
C    30.0
dtype: float64

In [None]:
# The three lines below are equivalent
s1 / 2
s1.div(2)
s1.divide(2)

A    2.5
B    NaN
C    7.5
dtype: float64

In [None]:
# The two lines below are equivalent
s1 // 4
s1.floordiv(4)

A    1.0
B    NaN
C    3.0
dtype: float64

In [None]:
# The two lines below are equivalent
s1 % 3
s1.mod(3)

A    2.0
B    NaN
C    0.0
dtype: float64

### 2.5.3 Broadcasting

In [None]:
s1 = pd.Series([1, 2, 3], index = ["A", "B", "C"])
s2 = pd.Series([4, 5, 6], index = ["A", "B", "C"])

In [None]:
s1 + s2

A    5
B    7
C    9
dtype: int64

In [None]:
s1 = pd.Series(data = [3, 6, np.nan, 12])
s2 = pd.Series(data = [2, 6, np.nan, 12])

In [None]:
# The two lines below are equivalent
s1 == s2
s1.eq(s2)

0    False
1     True
2    False
3     True
dtype: bool

In [None]:
# The two lines below are equivalent
s1 != s2
s1.ne(s2)

0     True
1    False
2     True
3    False
dtype: bool

In [206]:
s1 = pd.Series(
    data = [5, None, 15], index = ["A", "B", "C"]
)

s2 = pd.Series(
    data = [4, 8, 12, 14], index = ["B", "C", "D", "E"]
)

In [207]:
s1.add(s2, fill_value=-1)

Unnamed: 0,0
A,4.0
B,3.0
C,23.0
D,11.0
E,13.0


## 2.6 Passing the Series to Python's Built-In Functions

In [208]:
cities = pd.Series(
    data = ["San Francisco", "Los Angeles", "Las  Vegas", np.nan]
)

In [209]:
len(cities)

4

In [210]:
type(cities)

In [211]:
dir(cities)

['T',
 '_AXIS_LEN',
 '_AXIS_ORDERS',
 '_AXIS_TO_AXIS_NUMBER',
 '_HANDLED_TYPES',
 '__abs__',
 '__add__',
 '__and__',
 '__annotations__',
 '__array__',
 '__array_priority__',
 '__array_ufunc__',
 '__bool__',
 '__class__',
 '__column_consortium_standard__',
 '__contains__',
 '__copy__',
 '__deepcopy__',
 '__delattr__',
 '__delitem__',
 '__dict__',
 '__dir__',
 '__divmod__',
 '__doc__',
 '__eq__',
 '__finalize__',
 '__float__',
 '__floordiv__',
 '__format__',
 '__ge__',
 '__getattr__',
 '__getattribute__',
 '__getitem__',
 '__getstate__',
 '__gt__',
 '__hash__',
 '__iadd__',
 '__iand__',
 '__ifloordiv__',
 '__imod__',
 '__imul__',
 '__init__',
 '__init_subclass__',
 '__int__',
 '__invert__',
 '__ior__',
 '__ipow__',
 '__isub__',
 '__iter__',
 '__itruediv__',
 '__ixor__',
 '__le__',
 '__len__',
 '__lt__',
 '__matmul__',
 '__mod__',
 '__module__',
 '__mul__',
 '__ne__',
 '__neg__',
 '__new__',
 '__nonzero__',
 '__or__',
 '__pandas_priority__',
 '__pos__',
 '__pow__',
 '__radd__',
 '__rand__

In [212]:
list(cities)

['San Francisco', 'Los Angeles', 'Las  Vegas', nan]

In [213]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las  Vegas', 3: nan}

In [216]:
cities_areas = pd.Series([2100, 2200, 2300], index=cities.dropna().values)
print(dict(cities_areas))

{'San Francisco': np.int64(2100), 'Los Angeles': np.int64(2200), 'Las  Vegas': np.int64(2300)}


In [None]:
cities

0    San Francisco
1      Los Angeles
2       Las  Vegas
3              NaN
dtype: object

In [231]:
## "IN" not supported with value
"Los Angeles" in cities

False

In [229]:
## "IN" supported with index
2 in cities

True

In [230]:
"Los Angeles" in cities_areas

True

In [232]:
100 not in cities

True

In [240]:
## Need to use .values for membership check to work with "IN"
"Las  Vegas" not in cities.values

False

## 2.7 Coding Challenge

### 2.7.1 Problems

### 2.7.2 Solutions

In [None]:
superheroes = [
    "Batman",
    "Superman",
    "Spider-Man",
    "Iron Man",
    "Captain America",
    "Wonder Woman"
]

In [None]:
strength_levels = (100, 120, 90, 95, 110, 120)

In [None]:
pd.Series(superheroes)

0             Batman
1           Superman
2         Spider-Man
3           Iron Man
4    Captain America
5       Wonder Woman
dtype: object

In [None]:
pd.Series(data = strength_levels)

0    100
1    120
2     90
3     95
4    110
5    120
dtype: int64

In [None]:
heroes = pd.Series(
    data = strength_levels, index = superheroes
)

heroes

Batman             100
Superman           120
Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [None]:
heroes.head(2)

Batman      100
Superman    120
dtype: int64

In [None]:
heroes.tail(4)

Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [None]:
heroes.nunique()

5

In [None]:
heroes.mean()

105.83333333333333

In [None]:
heroes.max()

120

In [None]:
heroes.min()

90

In [None]:
heroes * 2

Batman             200
Superman           240
Spider-Man         180
Iron Man           190
Captain America    220
Wonder Woman       240
dtype: int64

In [None]:
dict(heroes)

{'Batman': 100,
 'Superman': 120,
 'Spider-Man': 90,
 'Iron Man': 95,
 'Captain America': 110,
 'Wonder Woman': 120}

## 2.7 Summary