In [2]:
import pandas
import numpy

pandas.set_option('display.notebook_repr_html', False)
pandas.set_option('display.max_rows', 8)
pandas.set_option('display.max_columns', 8)

+ The Series represents a one-dimensional labeled array based on the NumPy ndarray. 
+ The Series extends the functionality of the NumPy ndarray by adding an associated set of labels that are used to index the elements of the array

+ The DataFrame seamlessly manages multiple Series, where each of the Series represents a column of the DataFrame and also by automatically aligning values in each column along the index labels of the DataFrame.
+ A DataFrame has two axes, horizontal (rows, 0) and vertical (col, 1).

In [3]:
# Creating a Series and accessing elements
numpy.random.seed(1)
s = pandas.Series(numpy.random.randn(100))
s

0     1.624345
1    -0.611756
2    -0.528172
3    -1.072969
        ...   
96   -0.343854
97    0.043597
98   -0.620001
99    0.698032
dtype: float64

In [4]:
s[2]

-0.5281717522634557

In [5]:
s[[2, 5, 20]]

2    -0.528172
5    -2.301539
20   -1.100619
dtype: float64

In [6]:
s[3:8]

3   -1.072969
4    0.865408
5   -2.301539
6    1.744812
7   -0.761207
dtype: float64

In [7]:
s.head()

0    1.624345
1   -0.611756
2   -0.528172
3   -1.072969
4    0.865408
dtype: float64

In [8]:
s.tail()

95    0.077340
96   -0.343854
97    0.043597
98   -0.620001
99    0.698032
dtype: float64

In [10]:
s.index

RangeIndex(start=0, stop=100, step=1)

In [11]:
s.values

array([ 1.62434536, -0.61175641, -0.52817175, -1.07296862,  0.86540763,
       -2.3015387 ,  1.74481176, -0.7612069 ,  0.3190391 , -0.24937038,
        1.46210794, -2.06014071, -0.3224172 , -0.38405435,  1.13376944,
       -1.09989127, -0.17242821, -0.87785842,  0.04221375,  0.58281521,
       -1.10061918,  1.14472371,  0.90159072,  0.50249434,  0.90085595,
       -0.68372786, -0.12289023, -0.93576943, -0.26788808,  0.53035547,
       -0.69166075, -0.39675353, -0.6871727 , -0.84520564, -0.67124613,
       -0.0126646 , -1.11731035,  0.2344157 ,  1.65980218,  0.74204416,
       -0.19183555, -0.88762896, -0.74715829,  1.6924546 ,  0.05080775,
       -0.63699565,  0.19091548,  2.10025514,  0.12015895,  0.61720311,
        0.30017032, -0.35224985, -1.1425182 , -0.34934272, -0.20889423,
        0.58662319,  0.83898341,  0.93110208,  0.28558733,  0.88514116,
       -0.75439794,  1.25286816,  0.51292982, -0.29809284,  0.48851815,
       -0.07557171,  1.13162939,  1.51981682,  2.18557541, -1.39

In [12]:
s2 = pandas.Series([1, 2, 3, 4], index = ['a','b','c','d'])
s2

a    1
b    2
c    3
d    4
dtype: int64

In [13]:
s2 = pandas.Series({'a':1, 'b':2, 'c':3, 'd':4})
s2

a    1
b    2
c    3
d    4
dtype: int64

In [24]:
# Size, shape, uniqueness, and counts of values
s = pandas.Series([10, 0, 1, 1, 2, 3, 4, 5, 6, numpy.nan])
print('s.ravel() - flattens underlying data: {}'.format(s.ravel()))
print('len: {}'.format(len(s)))
print('shape: {}'.format(s.shape))
print('count() - ignores NaN: {}'.format(s.count()))
print('unique(): {}'.format(s.unique()))
print('value_counts(): \n{}'.format(s.value_counts()))

s.ravel() - flattens underlying data: [ 10.   0.   1.   1.   2.   3.   4.   5.   6.  nan]
len: 10
shape: (10,)
count() - ignores NaN: 9
unique(): [ 10.   0.   1.   2.   3.   4.   5.   6.  nan]
value_counts(): 
1.0     2
10.0    1
6.0     1
5.0     1
4.0     1
3.0     1
2.0     1
0.0     1
dtype: int64


In [26]:
# Alignment via index labels
s3 = pandas.Series([1, 2, 3, 4], index=['a', 'b', 'c', 'd'])
s3

a    1
b    2
c    3
d    4
dtype: int64

In [27]:
s4 = pandas.Series([4, 3, 2, 1], index=['d', 'c', 'b', 'a'])
s4

d    4
c    3
b    2
a    1
dtype: int64

In [28]:
s3 + s4

a    2
b    4
c    6
d    8
dtype: int64

In [30]:
a1 = numpy.array([1, 2, 3, 4])
a2 = numpy.array([4, 3, 2, 1])
a1 + a2

array([5, 5, 5, 5])

In [34]:
# Creating a DataFrame
pandas.DataFrame(numpy.array([[10, 11], [20, 21]]))

    0   1
0  10  11
1  20  21

In [40]:
df1 = pandas.DataFrame([pandas.Series(numpy.arange(10, 15)), pandas.Series(numpy.arange(15, 20))])
print('shape: {}'.format(df1.shape))
print('df1: \n{}'.format(df1))

shape: (2, 5)
df1: 
    0   1   2   3   4
0  10  11  12  13  14
1  15  16  17  18  19


In [42]:
df = pandas.DataFrame(numpy.array([[10, 11], [20, 21]]), columns=['a', 'b'])
print('df columns: {}'.format(df.columns))
df

df columns: Index(['a', 'b'], dtype='object')


    a   b
0  10  11
1  20  21

In [44]:
df.columns = (['c1','c2'])
df

   c1  c2
0  10  11
1  20  21

In [46]:
df = pandas.DataFrame(numpy.array([[0, 1], [2, 3]]), columns=['c1', 'c2'], index=['r1', 'r2'])
df

    c1  c2
r1   0   1
r2   2   3