In [1]:
import numpy as np
import pandas as pd

# remember, to quickly see pandas content you can type pd.<tab 

# and to see pandas documentation hit pd?

In [2]:
pd?

In [3]:
data = pd.Series([0.25, 0.5, 0.75, 1.0])
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
data.values

array([0.25, 0.5 , 0.75, 1.  ])

In [5]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [6]:
data[1]

0.5

In [7]:
data[0]

0.25

In [8]:
data[1:3]

1    0.50
2    0.75
dtype: float64

In [9]:
data[1::2]

1    0.5
3    1.0
dtype: float64

# main difference with Pandas Series and NumPy array: Numpy array has an implicitly defined integer index, whereas, Pandas Series have an explicitly defined index associated with the values

# for example, the explicit index can be anything, including not an integer

In [10]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index =['a', 'b', 'c', 'd'])
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [11]:
data['b']

0.5

# we may also use nonsequential or non- continuous indices

In [13]:
data = pd.Series([0.25, 0.5, 0.75, 1.0],
                index=[2, 5, 4, 12])
data

2     0.25
5     0.50
4     0.75
12    1.00
dtype: float64

In [14]:
data[5]

0.5

In [17]:
#data[3]=> won't work since there's no index for 3

## in a sense, we can think of a Pandas Series being capable of having specialization of a Python dictionary.

In [19]:
population_dict = {'California': 38500000,
                  'Texas': 26000000,
                  'New York': 20000000,
                  'Florida': 19000000,
                  'Illinois': 12000000}
population = pd.Series(population_dict)
population

California    38500000
Texas         26000000
New York      20000000
Florida       19000000
Illinois      12000000
dtype: int64

# by default, a Series will be created where the index is drawn

In [20]:
population['California']

38500000

## unlike a dictionary, the Series supports array-styled operations, including slicing

In [22]:
population['California':'New York']

California    38500000
Texas         26000000
New York      20000000
dtype: int64

In [25]:
pd.Series([2, 4, 6])

0    2
1    4
2    6
dtype: int64

#### ifit's a list or NumPy array, the index defaults to an integer sequence

In [27]:
pd.Series(5, index=[100, 200, 300]) #scalar => repeated to fill the specified index

100    5
200    5
300    5
dtype: int64

In [29]:
pd.Series({2:'a', 1:'b', 3:'c'}) #data can be a dictionary

2    a
1    b
3    c
dtype: object

In [30]:
pd.Series({2:'a', 1:'b', 3:'c'}, index=[3, 2]) #the index can be explicitly set, if wished

3    c
2    a
dtype: object

# Next, let's investigate the DataFrame

# if Series are thought of as one-dimensional arrays with flexible indices and row, a DataFrame can be thought of as a two-dimensional array with both flexible row indices and flexible column names

In [31]:
area_dict = {'California': 424000, 'Texas': 700000, 'New York': 142000, 'Florida': 170000, 'Illinois': 150000}
area = pd.Series(area_dict)
area

California    424000
Texas         700000
New York      142000
Florida       170000
Illinois      150000
dtype: int64

In [33]:
states = pd.DataFrame({'population': population, 'area': area})
states

Unnamed: 0,population,area
California,38500000,424000
Texas,26000000,700000
New York,20000000,142000
Florida,19000000,170000
Illinois,12000000,150000
