## Pandas Series Basics

In [2]:
import numpy as np
import pandas as pd # Pandas library as alias 'pd'

In [6]:
sales = [0, 5, 155, 0, 518, 0, 1827, 616, 317, 325]

sales_series = pd.Series(sales, name="Sales")

'''
Pandas' Series function converts Python lists
and NumPy arrays into Pandas Series

The name argument lets you specify a name
'''
print('The index(left) is an array of ints starting at 0 by default, but it can be modified.')
sales_series

The index(left) is an array of ints starting at 0 by default, but it can be modified.


0       0
1       5
2     155
3       0
4     518
5       0
6    1827
7     616
8     317
9     325
Name: Sales, dtype: int64

In [15]:
array = np.arange(10)

series = pd.Series(array)

In [13]:
pd.Series(np.arange(6).reshape(3, 2), name='Test Array')

ValueError: Data must be 1-dimensional, got ndarray of shape (3, 2) instead

In [16]:
series.values

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [17]:
series.values.mean()

4.5

In [18]:
series.mean()

4.5

In [22]:
series.index = [10, 20, 30, 40 , 50, 60, 70, 80, 90, 100]

series

10     0
20     1
30     2
40     3
50     4
60     5
70     6
80     7
90     8
100    9
dtype: int32

In [24]:
series.name = 'special name'

series

10     0
20     1
30     2
40     3
50     4
60     5
70     6
80     7
90     8
100    9
Name: special name, dtype: int32

In [25]:
series.dtype

dtype('int32')

## Pandas Data Types

In [None]:
# Booleans are stored as numeric datatypes. They are stored in the background as zero or one.

# ints and floats

# object - any python object

# strings

# category - Maps categorical data to a numeric array for efficentcy

''' Time Series '''
# datetime64 - a single moment in time (January 4, 2015, 2:00 PM)

# timedelta - the duration between two dates or times

# period - a span of time (a day, a week, a month, a year)

In [26]:
sales_series

0       0
1       5
2     155
3       0
4     518
5       0
6    1827
7     616
8     317
9     325
Name: Sales, dtype: int64

In [27]:
sales_series.astype("float")

0       0.0
1       5.0
2     155.0
3       0.0
4     518.0
5       0.0
6    1827.0
7     616.0
8     317.0
9     325.0
Name: Sales, dtype: float64

In [31]:
sales_series.astype("bool") # zeros are False, all others are True

0    False
1     True
2     True
3    False
4     True
5    False
6     True
7     True
8     True
9     True
Name: Sales, dtype: bool

In [30]:
sales_series.astype("datetime64") # will error out. "The 'datetime64' dtype has no unit"

ValueError: The 'datetime64' dtype has no unit. Please pass in 'datetime64[ns]' instead.

# Data types with Methods

In [32]:
pd.Series(range(5))

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [39]:
pd.Series(range(5)).astype("float")

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [40]:
pd.Series(range(5)).astype("float").sum()

10.0

In [41]:
pd.Series(range(5)).astype("bool")

0    False
1     True
2     True
3     True
4     True
dtype: bool

In [42]:
pd.Series(range(5)).astype("bool").mean()

0.8

In [35]:
pd.Series(range(5)).astype("object")

0    0
1    1
2    2
3    3
4    4
dtype: object

In [36]:
pd.Series(range(5)).astype("string")

0    0
1    1
2    2
3    3
4    4
dtype: string