In [3]:
import numpy as np
import pandas as pd

### Series

In [6]:
labels = ["a", "b", "c"]

In [7]:
mylist = [10, 20, 30]

In [8]:
arr = np.array(mylist)

In [9]:
arr

array([10, 20, 30])

In [10]:
d = {'a': 10, 'b': 20, 'c': 30}

In [11]:
pd.Series(mylist)

0    10
1    20
2    30
dtype: int64

In [12]:
pd.Series(arr)

0    10
1    20
2    30
dtype: int64

In [13]:
pd.Series(labels)

0    a
1    b
2    c
dtype: object

In [16]:
pd.Series(data=mylist, index=labels)

a    10
b    20
c    30
dtype: int64

In [17]:
ser1 = pd.Series(data=["Ottoman", "South Africa",
                 "South Korea"], index=[1, 2, 3])

In [18]:
ser1

1         Ottoman
2    South Africa
3     South Korea
dtype: object

In [22]:
ser1[1]

'Ottoman'

In [23]:
ser2 = pd.Series(data=[5, 6, 7, 8], index=["Morocco",
                 "Turkmenistan", "Kazakistan", "Qatar"])

In [24]:
ser2

Morocco         5
Turkmenistan    6
Kazakistan      7
Qatar           8
dtype: int64

In [25]:
ser1 + ser2

1               NaN
2               NaN
3               NaN
Kazakistan      NaN
Morocco         NaN
Qatar           NaN
Turkmenistan    NaN
dtype: object

### DataFrames

In [31]:
from numpy.random import randn
np.random.seed(101)

rand_mat = randn(5, 4)
rand_mat

array([[ 2.70684984,  0.62813271,  0.90796945,  0.50382575],
       [ 0.65111795, -0.31931804, -0.84807698,  0.60596535],
       [-2.01816824,  0.74012206,  0.52881349, -0.58900053],
       [ 0.18869531, -0.75887206, -0.93323722,  0.95505651],
       [ 0.19079432,  1.97875732,  2.60596728,  0.68350889]])

In [34]:
df = pd.DataFrame(data=rand_mat, index="A B C D E".split(), columns="W X Y Z".split())

In [35]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


#### grab the data from df

In [37]:
df["W"]

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [38]:
type(df["W"])

pandas.core.series.Series

In [39]:
desired_list = ["X", "Y"]

In [40]:
df[desired_list]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
C,0.740122,0.528813
D,-0.758872,-0.933237
E,1.978757,2.605967


In [41]:
df[["X", "Y"]]

Unnamed: 0,X,Y
A,0.628133,0.907969
B,-0.319318,-0.848077
C,0.740122,0.528813
D,-0.758872,-0.933237
E,1.978757,2.605967


In [43]:
df.W # this is not recommended

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

#### add a new column to exist DataFrame

In [44]:
df["Q"] = df["W"] + df["Y"]

In [45]:
df

Unnamed: 0,W,X,Y,Z,Q
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


#### remove the column

In [46]:
df.drop("Q", axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [47]:
df

Unnamed: 0,W,X,Y,Z,Q
A,2.70685,0.628133,0.907969,0.503826,3.614819
B,0.651118,-0.319318,-0.848077,0.605965,-0.196959
C,-2.018168,0.740122,0.528813,-0.589001,-1.489355
D,0.188695,-0.758872,-0.933237,0.955057,-0.744542
E,0.190794,1.978757,2.605967,0.683509,2.796762


In [48]:
df.drop("Q", axis=1, inplace=True)

In [49]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


#### rows

In [50]:
df.drop("C")

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [51]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [53]:
df.loc["A"]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [54]:
df.loc[["A"]]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826


#### integer location

In [55]:
df.iloc[0]

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [57]:
df.iloc[3]

W    0.188695
X   -0.758872
Y   -0.933237
Z    0.955057
Name: D, dtype: float64

In [58]:
df.iloc[0:2]

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965


In [60]:
df.loc[["B", "E"]]

Unnamed: 0,W,X,Y,Z
B,0.651118,-0.319318,-0.848077,0.605965
E,0.190794,1.978757,2.605967,0.683509


#### subset of the data

In [63]:
df.loc[['A', "B"]][["Y","Z"]]

Unnamed: 0,Y,Z
A,0.907969,0.503826
B,-0.848077,0.605965


In [64]:
df.loc[['A', "B"], ["Y","Z"]]

Unnamed: 0,Y,Z
A,0.907969,0.503826
B,-0.848077,0.605965
