In [1]:
import pandas as pd
import numpy as np

In [6]:
pd.Series

pandas.core.series.Series

In [7]:
## Classes and instances

In [9]:
pd.Series()

  pd.Series()


Series([], dtype: float64)

### Populating the Series with values

In [11]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin"
]

# Se crea un objeto `Series`
pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [12]:
pd.Series(ice_cream_flavors)
pd.Series(data=ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

### Customizing the Series index

Pandas assigns a position in line to each `Series` value. Each number signifies a value's order within the `Series`. The index starts counting from 0. The vlaue `'Strawberry'` is found at index `2` in the `Series`. Index labels can be any immutable data type: strings, datetimes, and more.

In [17]:
day_of_week = ('Monday', 'Wednesday', 'Friday', 'Saturday')

result = pd.Series(data=ice_cream_flavors, index=day_of_week)
(result[2], result['Friday'])

('Strawberry', 'Strawberry')

In [19]:
bunch_of_tools = [True, False, False]

pd.Series(bunch_of_tools)

0     True
1    False
2    False
dtype: bool

In [21]:
stock_prices = [985.32, 950.44]

time_of_day = ["Open", "Close"]

pd.Series(data=stock_prices, index=time_of_day)

Open     985.32
Close    950.44
dtype: float64

Pandas does its best to infer an appropriate data type for the `Series` from the `data` parameter's values. We can force coercion to a different type via the constructor's `dtype` parameter

In [25]:
numbers = [1,2,4,6,5]
pd.Series(numbers, dtype="float32")

0    1.0
1    2.0
2    4.0
3    6.0
4    5.0
dtype: float32

### Creating a Series with missing values

When pandas sees a missing value during a file import, the library substitutes NumPy’s `nan` object. The acronym nan

In [26]:
temperatures = [1,2,4,5, np.nan, 10]
pd.Series(temperatures)

0     1.0
1     2.0
2     4.0
3     5.0
4     NaN
5    10.0
dtype: float64

Notice that the Series dtype is float64. Pandas automatically converts numeric values from integers to floating-points when it spots a nan value; this internal technical requirement allows the library to store numeric values and missing values in the same homogeneous Series.

### Creating a Series from Python objects

In [27]:
calorie_info = {
    "Cereal": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342
}

diet = pd.Series(calorie_info)
diet

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
dtype: int64

In [29]:
rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(rgb_colors)

0     (120, 41, 26)
1    (196, 165, 45)
dtype: object

In [30]:
random_data = np.random.randint(1, 101, 10)
pd.Series(random_data)

0    79
1    34
2    57
3    62
4    54
5    75
6    83
7    46
8    89
9    28
dtype: int32

## Series attributes

In [40]:
result = pd.Series(calorie_info)
print(result.index, result.values, type(result.values), result.dtype, result.size, result.shape)

Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object') [125 406 342] <class 'numpy.ndarray'> int64 3 (3,)


In [42]:
# return True if all Series value are unique
result.is_unique

True

In [44]:
# returns True if each Series value is greater than the previous one
result.is_monotonic

False

### Retrieving the first and last rows

In [47]:
values  =range(0, 500, 5)
nums = pd.Series(values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [49]:
nums.head(3)
nums.head(n=3)

0     0
1     5
2    10
dtype: int64

In [51]:
nums.tail(5)

95    475
96    480
97    485
98    490
99    495
dtype: int64

## Mathematical operations

In [53]:
numbers = pd.Series([1,2,3,np.nan, 4,5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [57]:
# The count method counts the number of non-null values
numbers.count()

5

In [59]:
numbers.sum()

15.0

In [61]:
numbers.sum(skipna=False)

nan

In [63]:
numbers.product()

120.0

In [65]:
numbers.cumsum()

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

In [66]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [67]:
numbers.sample(2)

4    4.0
1    2.0
dtype: float64

In [69]:
names = pd.Series(['Galo', "César", "Cesar", 'Galo', 'Lorena'])
names.unique()

array(['Galo', 'César', 'Cesar', 'Lorena'], dtype=object)

In [70]:
names.nunique()

4

### Arithmetic operations

In [72]:
s1 = pd.Series(data=[5, np.nan, 15], index=["A", "B", "C"])
s1

A     5.0
B     NaN
C    15.0
dtype: float64

In [73]:
s1 + 3

A     8.0
B     NaN
C    18.0
dtype: float64

In [74]:
s1.add(3)

A     8.0
B     NaN
C    18.0
dtype: float64

In [78]:
s1 - 5
s1.sub(5), s1.mul(2), s1.div(2)

(A     0.0
 B     NaN
 C    10.0
 dtype: float64,
 A    10.0
 B     NaN
 C    30.0
 dtype: float64,
 A    2.5
 B    NaN
 C    7.5
 dtype: float64)

In [83]:
s1 = pd.Series([1,2,3], index = ["A", 'B', 'C'])
s2 = pd.Series([4,5,6], index = ["A", 'B', 'C'])

In [85]:
# Pandas aligns Series by shared index labels when performing a mathematical operation
s1 + s2

A    5
B    7
C    9
dtype: int64

In [86]:
s1 = pd.Series([3,6,np.nan,12])
s2 = pd.Series([3,6,np.nan,12])

In [88]:
s1 == s2

0     True
1     True
2    False
3     True
dtype: bool

In [89]:
s1 = pd.Series(data = [5,10,15], index=["A", "B", "C"])
s2 = pd.Series(data = [4,8,12,14], index=["B", "C", "D", "E"])

In [90]:
s1 + s2

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

In [97]:
s1.dtype, type(s1)

(dtype('int64'), pandas.core.series.Series)

In [None]:
dir(s1)

In [100]:
list(s1)

[5, 10, 15]

In [102]:
dict(s1)

{'A': 5, 'B': 10, 'C': 15}

In [108]:
'A' in s1

True

In [109]:
5 in s1.values

True