# Pandas for data analysis

### Series

In [17]:
import numpy as np
import pandas as pd
from numpy.random import randn

In [2]:
labels = ["a", "b", "c"]
data = [11, 22, 33]
arr = np.array(data)
myhash = {"a": 11, "b": 22, "c": 33}

In [3]:
pd.Series(data = data)

0    11
1    22
2    33
dtype: int64

In [4]:
pd.Series(data, labels)

a    11
b    22
c    33
dtype: int64

In [5]:
pd.Series(arr)

0    11
1    22
2    33
dtype: int32

In [6]:
pd.Series(myhash)

a    11
b    22
c    33
dtype: int64

In [7]:
series1 = pd.Series([1,2,3,4], ["japan", "singapore", "south korea", "USA"])

In [9]:
series2 = pd.Series([1,2,8,4], ["japan", "singapore", "north korea", "USA"])

In [10]:
series1["japan"]

1

In [16]:
pd.Series(data = series1 + series2)

USA            8.0
japan          2.0
north korea    NaN
singapore      4.0
south korea    NaN
dtype: float64

## DataFrames

In [18]:
np.random.seed(101)

In [20]:
df = pd.DataFrame(randn(5,4), ["A", "B", "C", "D", "E"], ["W", "X", "Y", "Z"])

In [21]:
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


#### Grabbing object from DataFrame

In [24]:
# Method 1
df["W"]

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

In [25]:
# Method 2
df.W

A    0.302665
B   -0.134841
C    0.807706
D   -0.497104
E   -0.116773
Name: W, dtype: float64

### Examples

In [29]:
# Getting multiple columns
df[["W", "Z"]]

Unnamed: 0,W,Z
A,0.302665,-1.159119
B,-0.134841,0.184502
C,0.807706,0.329646
D,-0.497104,0.484752
E,-0.116773,1.996652


In [41]:
# Creating new column in existing DataFrame
df["new"] = df["W"] + df["Y"]
df

Unnamed: 0,W,X,Y,Z,new
A,0.302665,1.693723,-1.706086,-1.159119,-1.40342
B,-0.134841,0.390528,0.166905,0.184502,0.032064
C,0.807706,0.07296,0.638787,0.329646,1.446493
D,-0.497104,-0.75407,-0.943406,0.484752,-1.44051
E,-0.116773,1.901755,0.238127,1.996652,0.121354


In [42]:
# Deleting existing columns 
df.drop("new", axis = 1, inplace = True)
df

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752
E,-0.116773,1.901755,0.238127,1.996652


In [47]:
# Deleting existing rows
df.drop("E") # Not providing inplace value does not permanently delete the row from DataFrame

Unnamed: 0,W,X,Y,Z
A,0.302665,1.693723,-1.706086,-1.159119
B,-0.134841,0.390528,0.166905,0.184502
C,0.807706,0.07296,0.638787,0.329646
D,-0.497104,-0.75407,-0.943406,0.484752


### Selecting rows from DataFrame

In [48]:
df.loc["A"]

W    0.302665
X    1.693723
Y   -1.706086
Z   -1.159119
Name: A, dtype: float64