# [Intro to Data Structures](https://pandas.pydata.org/pandas-docs/version/0.17.0/dsintro.html#series)

# [Series](https://pandas.pydata.org/pandas-docs/version/0.17.0/dsintro.html#series)

In [3]:
import numpy as np
import pandas as pd

In [None]:
s = pd.Series(np.random.randn(5), index=['a', 'b', 'c', 'd', 'e'])
s

a   -0.179276
b    1.004520
c   -1.344587
d   -1.699669
e   -1.074553
dtype: float64

In [None]:
s.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [None]:
pd.Series(np.random.randn(5))

0   -0.787171
1    0.068485
2   -0.210245
3    1.436049
4    0.613123
dtype: float64

In [None]:
d = {'a' : 0., 'b' : 1., 'c' : 2.}
pd.Series(d)

a    0.0
b    1.0
c    2.0
dtype: float64

In [None]:
pd.Series(d, index=['b', 'c', 'd', 'a'])

b    1.0
c    2.0
d    NaN
a    0.0
dtype: float64

From scalar value If data is a scalar value, an index must be provided. The value will be repeated to match the length of index



In [None]:
pd.Series(5, index=['a', 'b', 'c', 'd', 'e'])

a    5
b    5
c    5
d    5
e    5
dtype: int64

In [None]:
s

a   -0.179276
b    1.004520
c   -1.344587
d   -1.699669
e   -1.074553
dtype: float64

In [None]:
s[0]

-0.1792759755846804

In [None]:
s[:3]

a   -0.179276
b    1.004520
c   -1.344587
dtype: float64

In [None]:
s[s > s.median()]

a   -0.179276
b    1.004520
dtype: float64

In [None]:
s[[4,3,1]]

e   -1.074553
d   -1.699669
b    1.004520
dtype: float64

In [None]:
np.exp(s)

a    0.835875
b    2.730596
c    0.260647
d    0.182744
e    0.341450
dtype: float64

In [None]:
s['a'] == s[[0,]]

a    True
dtype: bool

In [None]:
s['a']

-0.1792759755846804

In [None]:
s['e']

-1.0745530446432012

In [None]:
s

a   -0.179276
b    1.004520
c   -1.344587
d   -1.699669
e   -1.074553
dtype: float64

In [None]:
'e' in s

True

In [None]:
'f' in s

False

In [None]:
s.get('f', np.nan)

nan

In [None]:
s+s

a   -0.358552
b    2.009040
c   -2.689173
d   -3.399339
e   -2.149106
dtype: float64

In [None]:
s*2

a   -0.358552
b    2.009040
c   -2.689173
d   -3.399339
e   -2.149106
dtype: float64

In [None]:
np.exp(s)

a    0.835875
b    2.730596
c    0.260647
d    0.182744
e    0.341450
dtype: float64

In [None]:
s[1:]

b    1.004520
c   -1.344587
d   -1.699669
e   -1.074553
dtype: float64

In [None]:
s[:-1]

a   -0.179276
b    1.004520
c   -1.344587
d   -1.699669
dtype: float64

In [None]:
s[1:] + s[:-1]

a         NaN
b    2.009040
c   -2.689173
d   -3.399339
e         NaN
dtype: float64

In [None]:
s = pd.Series(np.random.randn(5), name='something')
s

0   -0.769761
1   -0.779107
2   -0.745004
3   -0.567864
4    0.539211
Name: something, dtype: float64

In [None]:
s.name

'something'

# [DataFrame](https://pandas.pydata.org/pandas-docs/version/0.17.0/dsintro.html#dataframe)

In [34]:
d = {'one' : pd.Series([1., 2., 3.], index=['a', 'b', 'c']), 
     'two' : pd.Series([1., 2., 3., 4.], index=['a', 'b', 'c', 'd'])}

df = pd.DataFrame(d)
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [None]:
pd.DataFrame(d, index=['d', 'b', 'a'])

Unnamed: 0,one,two
d,,4.0
b,2.0,2.0
a,1.0,1.0


In [None]:
pd.DataFrame(d, index=['d', 'b', 'a'], columns=['two', 'three'])

Unnamed: 0,two,three
d,4.0,
b,2.0,
a,1.0,


In [None]:
df.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [None]:
df.columns

Index(['one', 'two'], dtype='object')

In [None]:
d = {'one' : [1., 2., 3., 4.],
     'two' : [4., 3., 2., 1.]}

pd.DataFrame(d)

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


In [None]:
pd.DataFrame(d, index=['a', 'b', 'c', 'd'])

Unnamed: 0,one,two
a,1.0,4.0
b,2.0,3.0
c,3.0,2.0
d,4.0,1.0


In [4]:
data = np.zeros((2,), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
data

array([(0, 0., b''), (0, 0., b'')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [5]:
data[:] = [(1, 2., 'Hello'), (2, 3., "World")]
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [6]:
pd.DataFrame(data)

Unnamed: 0,A,B,C
0,1,2.0,b'Hello'
1,2,3.0,b'World'


In [7]:
pd.DataFrame(data, index=['first', 'second'])

Unnamed: 0,A,B,C
first,1,2.0,b'Hello'
second,2,3.0,b'World'


In [8]:
pd.DataFrame(data, columns=['C', 'A', 'B'])

Unnamed: 0,C,A,B
0,b'Hello',1,2.0
1,b'World',2,3.0


In [13]:
pd.DataFrame(data, index=['first', 'second'], columns=['C', 'A', 'B', 'D'])

Unnamed: 0,C,A,B,D
first,b'Hello',1,2.0,
second,b'World',2,3.0,


In [14]:
data2 = [{'a' : 1, 'b' : 2}, {'a' : 5, 'b' : 10, 'c' : 20}]

pd.DataFrame(data2)

Unnamed: 0,a,b,c
0,1,2,
1,5,10,20.0


In [15]:
pd.DataFrame(data2, index=['first', 'second'])

Unnamed: 0,a,b,c
first,1,2,
second,5,10,20.0


In [19]:
pd.DataFrame(data2, columns=['a', 'b'])

Unnamed: 0,a,b
0,1,2
1,5,10


In [24]:
pd.DataFrame(
  {('a', 'b'): {('A', 'B'): 1, ('A', 'C'): 2},
  ('a', 'a'): {('A', 'C'): 3, ('A', 'B'): 4},
  ('a', 'c'): {('A', 'B'): 5, ('A', 'C'): 6},
  ('b', 'a'): {('A', 'C'): 7, ('A', 'B'): 8},
  ('b', 'b'): {('A', 'D'): 9, ('A', 'B'): 10}}
)

Unnamed: 0_level_0,Unnamed: 1_level_0,a,a,a,b,b
Unnamed: 0_level_1,Unnamed: 1_level_1,b,a,c,a,b
A,B,1.0,4.0,5.0,8.0,10.0
A,C,2.0,3.0,6.0,7.0,
A,D,,,,,9.0


In [25]:
data

array([(1, 2., b'Hello'), (2, 3., b'World')],
      dtype=[('A', '<i4'), ('B', '<f4'), ('C', 'S10')])

In [26]:
pd.DataFrame.from_records(data, index='C')

Unnamed: 0_level_0,A,B
C,Unnamed: 1_level_1,Unnamed: 2_level_1
b'Hello',1,2.0
b'World',2,3.0


In [28]:
# https://pandas.pydata.org/pandas-docs/version/0.23/generated/pandas.DataFrame.from_items.html
pd.DataFrame.from_dict([('A', [1, 2, 3]), ('B', [4, 5, 6])])

Unnamed: 0,0,1
0,A,"[1, 2, 3]"
1,B,"[4, 5, 6]"


In [32]:
# pd.DataFrame.from_items([('A', [1, 2, 3]), ('B', [4, 5, 6])], orient='index', columns=['one', 'two', 'three'])

In [37]:
# https://pandas.pydata.org/pandas-docs/version/0.17.0/dsintro.html#column-selection-addition-deletion
df

Unnamed: 0,one,two
a,1.0,1.0
b,2.0,2.0
c,3.0,3.0
d,,4.0


In [38]:
df['one']

a    1.0
b    2.0
c    3.0
d    NaN
Name: one, dtype: float64

In [40]:
df['three'] = df['one'] * df['two']
df['flag'] = df['one'] > 2
df

Unnamed: 0,one,two,three,flag
a,1.0,1.0,1.0,False
b,2.0,2.0,4.0,False
c,3.0,3.0,9.0,True
d,,4.0,,False


In [43]:
del df['two']
three = df.pop('three')
df

Unnamed: 0,one,flag
a,1.0,False
b,2.0,False
c,3.0,True
d,,False


In [44]:
three

a    1.0
b    4.0
c    9.0
d    NaN
Name: three, dtype: float64

In [45]:
df['foo'] = 'bar'
df

Unnamed: 0,one,flag,foo
a,1.0,False,bar
b,2.0,False,bar
c,3.0,True,bar
d,,False,bar


In [46]:
df['one_trunc'] = df['one'][:2]
df

Unnamed: 0,one,flag,foo,one_trunc
a,1.0,False,bar,1.0
b,2.0,False,bar,2.0
c,3.0,True,bar,
d,,False,bar,


In [47]:
df.insert(1, 'bar', df['one'])
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [50]:
iris = pd.read_csv('https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv')
iris.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [51]:
# assign always returns a copy of the data, leaving the original DataFrame untouched.
iris.assign(sepal_ratio = iris['sepal_width'] / iris['sepal_length']).head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,sepal_ratio
0,5.1,3.5,1.4,0.2,setosa,0.686275
1,4.9,3.0,1.4,0.2,setosa,0.612245
2,4.7,3.2,1.3,0.2,setosa,0.680851
3,4.6,3.1,1.5,0.2,setosa,0.673913
4,5.0,3.6,1.4,0.2,setosa,0.72


In [53]:
iris.assign(sepal_ratio = lambda x: (x['sepal_width'] / x['sepal_length'])).tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species,sepal_ratio
145,6.7,3.0,5.2,2.3,virginica,0.447761
146,6.3,2.5,5.0,1.9,virginica,0.396825
147,6.5,3.0,5.2,2.0,virginica,0.461538
148,6.2,3.4,5.4,2.3,virginica,0.548387
149,5.9,3.0,5.1,1.8,virginica,0.508475


In [61]:
# iris.query('sepal_length > 5.0').assign(sepal_ratio = lambda x: x.sepal_width / x.sepal_length, sepal_ratio = lambda x: x.petal_width / x.petal_length).plot(kind='scatter', x='sepal_ratio', y='petal_ratio')

![ss](https://pandas.pydata.org/pandas-docs/version/0.17.0/_images/basics_assign.png)

In [67]:
# df.assign(C = lambda x: x['A'] + x['B']).assign(D = lambda x: x['A'] + x['C'])

<table border="1" class="docutils">
<colgroup>
<col width="50%">
<col width="33%">
<col width="17%">
</colgroup>
<thead valign="bottom">
<tr class="row-odd"><th class="head">Operation</th>
<th class="head">Syntax</th>
<th class="head">Result</th>
</tr>
</thead>
<tbody valign="top">
<tr class="row-even"><td>Select column</td>
<td><tt class="docutils literal"><span class="pre">df[col]</span></tt></td>
<td>Series</td>
</tr>
<tr class="row-odd"><td>Select row by label</td>
<td><tt class="docutils literal"><span class="pre">df.loc[label]</span></tt></td>
<td>Series</td>
</tr>
<tr class="row-even"><td>Select row by integer location</td>
<td><tt class="docutils literal"><span class="pre">df.iloc[loc]</span></tt></td>
<td>Series</td>
</tr>
<tr class="row-odd"><td>Slice rows</td>
<td><tt class="docutils literal"><span class="pre">df[5:10]</span></tt></td>
<td>DataFrame</td>
</tr>
<tr class="row-even"><td>Select rows by boolean vector</td>
<td><tt class="docutils literal"><span class="pre">df[bool_vec]</span></tt></td>
<td>DataFrame</td>
</tr>
</tbody>
</table>

In [65]:
df

Unnamed: 0,one,bar,flag,foo,one_trunc
a,1.0,1.0,False,bar,1.0
b,2.0,2.0,False,bar,2.0
c,3.0,3.0,True,bar,
d,,,False,bar,


In [68]:
df.loc['b']

one            2.0
bar            2.0
flag         False
foo            bar
one_trunc      2.0
Name: b, dtype: object

In [69]:
df.iloc[2]

one           3.0
bar           3.0
flag         True
foo           bar
one_trunc     NaN
Name: c, dtype: object

In [None]:
# https://pandas.pydata.org/pandas-docs/version/0.17.0/dsintro.html#data-alignment-and-arithmetic