## Series
### Introduction
- Series is a one dimensional lebeled array capable of holding any data type, and that can be integers, strings, floating point
  numbers or python objects, etc.
- The axis labels are collectively referred to as the index.

In [1]:
import pandas as pd

In [2]:
numbers = [1, 2, 3 , 4, 5]
print(pd.Series(numbers))

0    1
1    2
2    3
3    4
4    5
dtype: int64


In [6]:
letters = ['a', 'b', 'c', 'd', 'e']
pd.Series(data=letters, index=numbers)

1    a
2    b
3    c
4    d
5    e
dtype: object

In [5]:
pd.Series(data=letters, index=['letter 1', 'letter 2', 'letter 3', 'letter 4', 'letter 5'])

letter 1    a
letter 2    b
letter 3    c
letter 4    d
letter 5    e
dtype: object

In [6]:
info = {'Name':'Abhishek', 'age':26, 'work':'Developer'}
pd.Series(data=info)

Name     Abhishek
age            26
work    Developer
dtype: object

In [6]:
# Index will try to use find those keys, but if it doesn't exist will provide NaN
pd.Series(data=info, index=['Name', 'age', 'job'])

Name    Abhishek
age           26
job          NaN
dtype: object

## Attributes
- It return information regarding the object 

In [8]:
numbers = [1, 2, 3, 4, 5]
letters = ['a', 'b', 'c', 'd', 'e']

In [9]:
number_series = pd.Series(numbers)
print(number_series)

letter_series = pd.Series(letters)
print(letter_series)

0    1
1    2
2    3
3    4
4    5
dtype: int64
0    a
1    b
2    c
3    d
4    e
dtype: object


In [8]:
print(number_series.dtype)
print(letter_series.dtype)

int64
object


In [13]:
# .values returns the series as an array
print(letter_series.values)

# .index returns information about the index
print(number_series.index)

['a' 'b' 'c' 'd' 'e']
RangeIndex(start=0, stop=5, step=1)


In [10]:
incomplete_series = pd.Series(data=['a', 'b', None], index=[1, 2, 3])
print(incomplete_series)

# .hasnans will check whether a series has missing values (or NaNs)
print(incomplete_series.hasnans)

1       a
2       b
3    None
dtype: object
True


In [11]:
# .shape returns the shape of a series has a tuple
# Usually used with dataframes
print(number_series.shape)

# .size returns the size of the series
print(number_series.size)

(5,)
5


## Methods
- Methods actually perform actions and can even end up changing the original object.

In [52]:
values = pd.Series(data=[1.5, 2.5, 0, None, 4, -5])
print(values)

0    1.5
1    2.5
2    0.0
3    NaN
4    4.0
5   -5.0
dtype: float64


In [53]:
print(values.max())
print(values.min())
print(values.sum())
print(values.mean())
print(values.idxmax())   # return the index which has maximum value
print(values.idxmin())   # return the index which has miniimum value
print(values.isnull())   # Checks whether each value is null or not
print(values.round())    # Round each values in the series

4.0
-5.0
3.0
0.6
4
5
0    False
1    False
2    False
3     True
4    False
5    False
dtype: bool
0    2.0
1    2.0
2    0.0
3    NaN
4    4.0
5   -5.0
dtype: float64


## Handling CSV files

In [None]:
# Read the CSV file and squeeze the column into a series

richest = pd.read_csv('TopRichestInWorld.csv', usecols=['Name']).squeeze()
print("Richest Person List: \n", richest)
print(type(richest))

Richest Person List: 
 0                     Elon Musk
1                    Jeff Bezos
2      Bernard Arnault & family
3                    Bill Gates
4                Warren Buffett
                 ...           
96             Vladimir Potanin
97         Harold Hamm & family
98                 Sun Piaoyang
99           Luo Liguo & family
100                   Peter Woo
Name: Name, Length: 101, dtype: object
<class 'pandas.core.series.Series'>


### Create and read CSV file

In [21]:
# Create a CSV file (This will be placed in the current folder)
richest.to_csv('test.csv', index=False)   # set to False to avoid extra index

In [35]:
# Read the CSV file that we created
people = pd.read_csv('test.csv').squeeze()
print(people)
print("Type: ", type(people))

0                     Elon Musk
1                    Jeff Bezos
2      Bernard Arnault & family
3                    Bill Gates
4                Warren Buffett
                 ...           
96             Vladimir Potanin
97         Harold Hamm & family
98                 Sun Piaoyang
99           Luo Liguo & family
100                   Peter Woo
Name: Name, Length: 101, dtype: object
Type:  <class 'pandas.core.series.Series'>


### head() and tail()

In [38]:
a = richest.head(10)   # Returns the first 10 rows
print(a)

b = richest.head(-10)  # Returns all the rows except last 10 rows
print(b)

0                   Elon Musk
1                  Jeff Bezos
2    Bernard Arnault & family
3                  Bill Gates
4              Warren Buffett
5                  Larry Page
6                 Sergey Brin
7               Larry Ellison
8               Steve Ballmer
9               Mukesh Ambani
Name: Name, dtype: object
0                    Elon Musk
1                   Jeff Bezos
2     Bernard Arnault & family
3                   Bill Gates
4               Warren Buffett
                ...           
86              Vladimir Lisin
87        Fan Hongwei & family
88              Lakshmi Mittal
89              Andrew Forrest
90     Jiang Rensheng & family
Name: Name, Length: 91, dtype: object


In [39]:
c = richest.tail(10)      # Returns last 10 rows
print(c)

d = richest.tail(-10)     # Returns all the rows except first 10 rows
print(d)

91     Savitri Jindal & family
92                 Wang Wenyin
93                   Li Xiting
94              Stefan Persson
95                 Steve Cohen
96            Vladimir Potanin
97        Harold Hamm & family
98                Sun Piaoyang
99          Luo Liguo & family
100                  Peter Woo
Name: Name, dtype: object
10                     Gautam Adani & family
11                         Michael Bloomberg
12                 Carlos Slim Helu & family
13     Francoise Bettencourt Meyers & family
14                           Mark Zuckerberg
                       ...                  
96                          Vladimir Potanin
97                      Harold Hamm & family
98                              Sun Piaoyang
99                        Luo Liguo & family
100                                Peter Woo
Name: Name, Length: 91, dtype: object


### Sorting values in a series

In [25]:
new_richest = richest.sort_values(axis=0, ascending=True, kind='quicksort', na_position='last', ignore_index=True)
print("New sorted richest list: \n", new_richest)

New sorted richest list: 
 0           Abigail Johnson
1          Alain Wertheimer
2              Alice Walton
3            Amancio Ortega
4            Andrew Forrest
               ...         
96           Warren Buffett
97         William Lei Ding
98     Yang Huiyan & family
99             Zhang Yiming
100          Zhong Shanshan
Name: Name, Length: 101, dtype: object


In [74]:
# Sorting index
richest_sort_index = richest.sort_index(ascending=False, ignore_index=True)
print(richest_sort_index)

0                     Peter Woo
1            Luo Liguo & family
2                  Sun Piaoyang
3          Harold Hamm & family
4              Vladimir Potanin
                 ...           
96               Warren Buffett
97                   Bill Gates
98     Bernard Arnault & family
99                   Jeff Bezos
100                   Elon Musk
Name: Name, Length: 101, dtype: object


### Value Counts
-  count the occurrences of certain values or of all the values.

In [30]:
richest = pd.read_csv('TopRichestInWorld.csv', usecols=['Industry']).squeeze()
print(richest)

value_count_FDI = richest.value_counts(normalize=True, ascending=True, dropna=False)
print("\nValue Counts from different industries: \n", value_count_FDI)

0                 Automotive
1                 Technology
2           Fashion & Retail
3                 Technology
4      Finance & Investments
               ...          
96           Metals & Mining
97                    Energy
98                Healthcare
99             Manufacturing
100              Real Estate
Name: Industry, Length: 101, dtype: object

Value Counts from different industries: 
 Industry
Gambling & Casinos       0.009901
Service                  0.009901
Telecom                  0.019802
Logistics                0.019802
Energy                   0.019802
Real Estate              0.029703
Media & Entertainment    0.039604
Manufacturing            0.049505
Healthcare               0.049505
Diversified              0.059406
Automotive               0.069307
Food & Beverage          0.079208
Metals & Mining          0.089109
Finance & Investments    0.128713
Technology               0.148515
Fashion & Retail         0.178218
Name: proportion, dtype: float64


In [None]:
# bins - it creates groups and it only works with numerical data
richest = pd.read_csv('TopRichestInWorld.csv', usecols=['Age']).squeeze()
richest.value_counts(bins=4)

(62.0, 78.0]      40
(46.0, 62.0]      31
(78.0, 94.0]      24
(29.935, 46.0]     6
Name: count, dtype: int64

### Accessing elements via positions

In [None]:
richest = pd.read_csv('TopRichestInWorld.csv', usecols=['Name']).squeeze()
richest.head(10)   # Get the first 10 elements

0                   Elon Musk
1                  Jeff Bezos
2    Bernard Arnault & family
3                  Bill Gates
4              Warren Buffett
5                  Larry Page
6                 Sergey Brin
7               Larry Ellison
8               Steve Ballmer
9               Mukesh Ambani
Name: Name, dtype: object

In [None]:
richest[0]     # accessing an element via its exact position

'Elon Musk'

In [None]:
richest[[1, 2, 10]]  # Accessing a multiple elements with a list of positions

1                   Jeff Bezos
2     Bernard Arnault & family
10       Gautam Adani & family
Name: Name, dtype: object

In [None]:
richest[5:10]  # Indexing via slicing

5       Larry Page
6      Sergey Brin
7    Larry Ellison
8    Steve Ballmer
9    Mukesh Ambani
Name: Name, dtype: object

In [None]:
richest[0:21:4]    # Indexing via slicing with a step

0                     Elon Musk
4                Warren Buffett
8                 Steve Ballmer
12    Carlos Slim Helu & family
16               Zhong Shanshan
20                 Charles Koch
Name: Name, dtype: object

In [None]:
richest[:10]      # Accessing everything until 10 (excluded)

0                   Elon Musk
1                  Jeff Bezos
2    Bernard Arnault & family
3                  Bill Gates
4              Warren Buffett
5                  Larry Page
6                 Sergey Brin
7               Larry Ellison
8               Steve Ballmer
9               Mukesh Ambani
Name: Name, dtype: object

In [106]:
richest[-1:] # Will not raise an exception

100    Peter Woo
Name: Name, dtype: object

In [108]:
richest.index

RangeIndex(start=0, stop=101, step=1)

### Accessing elements via index

In [33]:
richest = pd.read_csv('TopRichestInWorld.csv', usecols=['Name', 'NetWorth'], index_col=['Name']).squeeze()
print(richest)

Name
Elon Musk                   $219,000,000,000
Jeff Bezos                  $171,000,000,000
Bernard Arnault & family    $158,000,000,000
Bill Gates                  $129,000,000,000
Warren Buffett              $118,000,000,000
                                  ...       
Vladimir Potanin             $17,300,000,000
Harold Hamm & family         $17,200,000,000
Sun Piaoyang                 $17,100,000,000
Luo Liguo & family           $17,000,000,000
Peter Woo                    $17,000,000,000
Name: NetWorth, Length: 101, dtype: object


In [115]:
richest.head(10)

Name
Elon Musk                   $219,000,000,000
Jeff Bezos                  $171,000,000,000
Bernard Arnault & family    $158,000,000,000
Bill Gates                  $129,000,000,000
Warren Buffett              $118,000,000,000
Larry Page                  $111,000,000,000
Sergey Brin                 $107,000,000,000
Larry Ellison               $106,000,000,000
Steve Ballmer                $91,400,000,000
Mukesh Ambani                $90,700,000,000
Name: NetWorth, dtype: object

In [116]:
richest["Bill Gates"]

'$129,000,000,000'

In [118]:
richest[['Elon Musk', 'Warren Buffett', 'Mukesh Ambani']]

Name
Elon Musk         $219,000,000,000
Warren Buffett    $118,000,000,000
Mukesh Ambani      $90,700,000,000
Name: NetWorth, dtype: object

In [None]:
richest['Bill Gates':'Mukesh Ambani'] 

Name
Bill Gates        $129,000,000,000
Warren Buffett    $118,000,000,000
Larry Page        $111,000,000,000
Sergey Brin       $107,000,000,000
Larry Ellison     $106,000,000,000
Steve Ballmer      $91,400,000,000
Mukesh Ambani      $90,700,000,000
Name: NetWorth, dtype: object

In [120]:
richest[0:2]

Name
Elon Musk     $219,000,000,000
Jeff Bezos    $171,000,000,000
Name: NetWorth, dtype: object

In [4]:
richest.index

RangeIndex(start=0, stop=101, step=1)