# Introduction to pandas Data Structures

## 1. Series

In [1]:
import numpy as np
import pandas as pd

In [2]:
series = pd.Series([4, 7, -5, 3])
#string repr of Series
series

0    4
1    7
2   -5
3    3
dtype: int64

In [3]:
#array reper str=>numeric
series.values

array([ 4,  7, -5,  3], dtype=int64)

In [4]:
#index repr
series.index

RangeIndex(start=0, stop=4, step=1)

### 1.1 Custom indexing

In [5]:
series_2 = pd.Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
series_2

d    4
b    7
a   -5
c    3
dtype: int64

In [6]:
series_2.index

Index(['d', 'b', 'a', 'c'], dtype='object')

### 1.2 Selecting values via index

In [7]:
series_2['a']

-5

In [8]:
series_2[['b', 'a', 'c']]

b    7
a   -5
c    3
dtype: int64

In [9]:
series_2[series_2 > 0]

d    4
b    7
c    3
dtype: int64

In [10]:
series_2 * 2

d     8
b    14
a   -10
c     6
dtype: int64

In [11]:
'c' in series_2

True

### 1.3 Creating Series from Python dict

In [12]:
#key=index, value=value
states = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
series_3 = pd.Series(states)
series_3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [13]:
#overriding indexes
indexes = ['California', 'San Francisco', 'Washington', 'Texas']
series_4 = pd.Series(states, index=indexes)
series_4

California           NaN
San Francisco        NaN
Washington           NaN
Texas            71000.0
dtype: float64

In [14]:
#detecting the missing value
pd.isnull(series_4)

California        True
San Francisco     True
Washington        True
Texas            False
dtype: bool

In [15]:
#detecting notnull values
pd.notnull(series_4)

California       False
San Francisco    False
Washington       False
Texas             True
dtype: bool

In [16]:
series_3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [17]:
series_4

California           NaN
San Francisco        NaN
Washington           NaN
Texas            71000.0
dtype: float64

In [18]:
#operators look for index keys
series_3 + series_4

California            NaN
Ohio                  NaN
Oregon                NaN
San Francisco         NaN
Texas            142000.0
Utah                  NaN
Washington            NaN
dtype: float64

### 1.4 Naming Series object and its index itself

In [19]:
series_4.name = 'population'
series_4.index.name = 'states'
series_4

states
California           NaN
San Francisco        NaN
Washington           NaN
Texas            71000.0
Name: population, dtype: float64

In [20]:
#index assignment in-place and values are preserved on previous data
series.index = ['Alice', 'Bob', 'Caroline', 'David']
series

Alice       4
Bob         7
Caroline   -5
David       3
dtype: int64

In [21]:
#alternatively override and lose the vaues of previous series
pd.Series(series, index=['Ali', 'Baxodir', 'Sardor', 'Davron'])

Ali       NaN
Baxodir   NaN
Sardor    NaN
Davron    NaN
dtype: float64