In [17]:
import numpy as np
import pandas as pd

#1.Series : 1-D one-dimensional labeled array 
#capable of holding any data type (integers, strings, floating point numbers, Python objects, etc.).
#axis labels are collectively referred to as the index

#s = pd.Series(data, index=index)

#data can be many different things:

#1.a Python dict
#2.an ndarray
#3.a scalar value (like 5)

#index is a list of axis labels

In [21]:
#numpy.random.randn(d0, d1, …, dn) - creates an array, d0 to dn defines the dimensions, values are picked from a univariate
#Standard Gaussian Distribution i.e. mean = 0 , variance = 1.
#if any of the d_i are floats, they are first converted to integers by truncation). 
#A single float randomly sampled from the distribution is returned if no argument is provided.
#Can also be manipulated as np.random.randn(2, 2 ,2) * 3 + 2 

#Different ways for random numbers:

#1. rand(d0, d1, …, dn) - It gives uniformly distributed random numbers whose values lies between 0 and 1. 
#Since it is uniformly distributed, therefore the mean value is 0.5

#2. randn(d0, d1, …, dn) - It gives normally (Gaussian) distributed random numbers whose values 
#theoretically lies between -Infinity to Infinity having 0 mean and 1 variance.

#3. randint(low[, high, size, dtype]) - Return random integers from the “discrete uniform” distribution of the specified dtype in the “half-open” interval [low, high). 
#If high is None (the default), then results are from [0, low).
#size : int or tuple of ints - (d0, d1, …, dn) - d0 to dn defines the dimensions

#From ndarray
s1 = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
#index must be the same length as data by default - [0, ..., len(data) - 1]

#From Dict:
s2 = pd.Series({'b' : 1, 'a' : 0, 'c' : 2})

#From Scalar Value - Index is a must here
s3 = pd.Series(5., index=['a', 'b', 'c', 'd', 'e'])
s3

#A Series is like a fixed-size dict

a    5.0
b    5.0
c    5.0
d    5.0
e    5.0
dtype: float64

In [22]:
#Note pandas supports non-unique index values. If an operation that does not support duplicate index values is attempted, 
#an exception will be raised at that time.

#The reason for being lazy is nearly all performance-based. 
#(there are many instances in computations, like parts of GroupBy, where the index is not used).

In [26]:
#s3['f'] - Missing label raises exception so use get it returns None or specified default.
print(s.get('f'))
print(s.get('f', np.nan))

None
nan


In [35]:
#Vectorized operations and label alignment with Series
s1 += s1
s1

np.exp(s1)
s1

a      -4.586399
b   -1107.037154
c     -91.431978
d    -785.759391
e    -443.215685
dtype: float64

In [39]:
s4 = pd.Series({'a' : 0, 'b' : 1, 'c' : 2})
np.exp(s4) #This alone does not modify the series. An assingment has to be done.

a    1.000000
b    2.718282
c    7.389056
dtype: float64

In [43]:
#A key difference between Series and ndarray is that operations between Series automatically align the data based on label. 
#Thus, you can write computations without giving consideration to whether the Series involved have the same labels.
s4[:1] + s4[:]

#The result of an operation between unaligned Series will have the union of the indexes involved. 
#If a label is not found in one Series or the other, the result will be marked as missing NaN.

a    0.0
b    NaN
c    NaN
dtype: float64

In [None]:
#Data Frame: a 2-dimensional labeled data structure with columns of potentially different types.
#Like a dict of Series objects.

#Along with the data, you can optionally pass index (row labels) and columns (column labels) arguments.
#From dictionary of series:
