In [5]:
import pandas as pd
from pandas import Series, DataFrame
import numpy as np

# Series

* indeks belirten tek boyutlu array.

* `Series([4,7,-5,3])`

* Out:

    0  4

    1  7

    2  -5

    3  3

In [3]:
obj2 = Series([4, 7, -5, 3], index=['d', 'b', 'a', 'c'])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [4]:
obj2['d'] = 6
print(obj2[['c', 'a', 'd']])

c    3
a   -5
d    6
dtype: int64


## Numpy array operations in Series

In [6]:
np.exp(obj2)

d     403.428793
b    1096.633158
a       0.006738
c      20.085537
dtype: float64

## Dict --> Series

In [7]:
sdata = {'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}
obj3 = Series(sdata)
obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [19]:
obj3.name = 'population'
obj3.index.name = 'state'
obj3

state
Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
Name: population, dtype: int64

# DataFrame

* a dict of Series

* has both a row and column index

* data is stored one or more two dimensional blocks rather than a list, dict or some others...


## Dict Format

In [23]:
data = {'state': ['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
 'year': [2000, 2001, 2002, 2001, 2002],
 'pop': [1.5, 1.7, 3.6, 2.4, 2.9]}
frame = DataFrame(data)
frame

Unnamed: 0,state,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


### Genel Özellikler

In [24]:
DataFrame(data, columns=['year', 'state', 'pop'])

Unnamed: 0,year,state,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9


#### `DataFrame(data, columns=, index=)`

* `data` da var olmayan bir column girdiğinde `Nan` dolar.

In [30]:
frame2 = DataFrame(data, columns=['year', 'state', 'pop', 'debt'],
index=['one', 'two', 'three', 'four', 'five'])
frame2


Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,
five,2002,Nevada,2.9,


#### sütunlara erişme

In [31]:
print(frame['state'])
print(frame.year)

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
Name: state, dtype: object
0    2000
1    2001
2    2002
3    2001
4    2002
Name: year, dtype: int64


In [44]:
frame2.debt = np.arange(5)
frame2.debt

one      0
two      1
three    2
four     3
five     4
Name: debt, dtype: int64

#### loc[] - iloc[] kısa giriş

In [40]:
print(frame2.loc['three'])
print(frame2.iloc[2])

print(frame2.iloc[2:4])
print(frame2.iloc[2:4,1:3])

year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object
year     2002
state    Ohio
pop       3.6
debt      NaN
Name: three, dtype: object
       year   state  pop debt
three  2002    Ohio  3.6  NaN
four   2001  Nevada  2.4  NaN
        state  pop
three    Ohio  3.6
four   Nevada  2.4


#### Assigning lists or arrays to a column

In [49]:
val = Series([-1.2, -1.5, -1], index=['two', 'four', 'five'])
frame2['debt'] = val
frame2

Unnamed: 0,year,state,pop,debt
one,2000,Ohio,1.5,
two,2001,Ohio,1.7,-1.2
three,2002,Ohio,3.6,
four,2001,Nevada,2.4,-1.5
five,2002,Nevada,2.9,-1.0


#### `del` keyword

In [58]:
frame2['greater_than_2.5'] = frame2['pop'] > 2.5
frame2

Unnamed: 0,year,state,pop,debt,greater_than_2.5
one,2000,Ohio,1.5,,False
two,2001,Ohio,1.7,-1.2,False
three,2002,Ohio,3.6,,True
four,2001,Nevada,2.4,-1.5,False
five,2002,Nevada,2.9,-1.0,True


In [59]:
del frame2['greater_than_2.5']

## Nested dict of dicts format

In [60]:
pop = {'Nevada': {2001: 2.4, 2002: 2.9},
'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}
pop

{'Nevada': {2001: 2.4, 2002: 2.9}, 'Ohio': {2000: 1.5, 2001: 1.7, 2002: 3.6}}

* the **outer dict keys** --> **columns** 

* the **inner keys** --> **row indices**

In [62]:
frame3 = DataFrame(pop)
frame3

Unnamed: 0,Nevada,Ohio
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


## Dicts of Series

In [65]:
pdata = {'Ohio': frame3['Ohio'][:-1],
'Nevada': frame3['Nevada'][:2]}
pdata

{'Ohio': 2001    1.7
 2002    3.6
 Name: Ohio, dtype: float64,
 'Nevada': 2001    2.4
 2002    2.9
 Name: Nevada, dtype: float64}

In [66]:
DataFrame(pdata)

Unnamed: 0,Ohio,Nevada
2001,1.7,2.4
2002,3.6,2.9


In [67]:
frame3.index.name = 'year'
frame3.columns.name = 'state'
frame3.name = 'population'
frame3

state,Nevada,Ohio
year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,2.4,1.7
2002,2.9,3.6
2000,,1.5


### Genel Özellikler (cont.)

#### `DataFrame.values`

In [69]:
frame2.values

array([[2000, 'Ohio', 1.5, nan],
       [2001, 'Ohio', 1.7, -1.2],
       [2002, 'Ohio', 3.6, nan],
       [2001, 'Nevada', 2.4, -1.5],
       [2002, 'Nevada', 2.9, -1.0]], dtype=object)

In [70]:
frame3.values

array([[2.4, 1.7],
       [2.9, 3.6],
       [nan, 1.5]])

`Type` and `Notes`

**2D ndarray** A matrix of data, passing optional row and column labels

**dict of arrays**, lists, or tuples Each sequence becomes a column in the DataFrame. All sequences must be the same length.

**NumPy structured/record array** Treated as the “dict of arrays” case

**dict of Series** Each value becomes a column. Indexes from each Series are unioned together to form the
result’s row index if no explicit index is passed.

**dict of dicts** Each inner dict becomes a column. Keys are unioned to form the row index as in the “dict of
Series” case.

**list of dicts** or Series Each item becomes a row in the DataFrame. Union of dict keys or Series indexes become the
DataFrame’s column labels

**List of lists** or tuples Treated as the “2D ndarray” case

**Another DataFrame** The DataFrame’s indexes are used unless different ones are passed

**NumPy MaskedArray** Like the “2D ndarray” case except masked values become NA/missing in the DataFrame result