# Pandas

#### Dataframes and Series

- dataframes is a multidimensional
- series is a one dimensional

In [1]:
# importing the libraries
import numpy as np
import pandas as pd

In [2]:
# creating a series
s = pd.Series([1, 3, 5, np.nan, 6, 8])

In [3]:
s

0    1.0
1    3.0
2    5.0
3    NaN
4    6.0
5    8.0
dtype: float64

In [4]:
# creating a dataframe by passing a numpy array
dates = pd.date_range('20130101', periods=6)

In [5]:
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))

In [7]:
df

Unnamed: 0,A,B,C,D
2013-01-01,-0.400382,1.403351,0.512667,0.214407
2013-01-02,-0.609616,1.657276,1.925659,-0.583726
2013-01-03,-1.866863,0.296303,0.408153,0.235119
2013-01-04,-0.93687,-0.823123,0.789797,0.641975
2013-01-05,-0.882675,-0.779926,0.681746,0.104061
2013-01-06,0.080795,0.852465,-0.651634,-1.072279


In [8]:
# get 1st row
df.iloc[0]

A   -0.400382
B    1.403351
C    0.512667
D    0.214407
Name: 2013-01-01 00:00:00, dtype: float64

In [9]:
df.head(1)

Unnamed: 0,A,B,C,D
2013-01-01,-0.400382,1.403351,0.512667,0.214407


In [10]:
# get columns
df.A

2013-01-01   -0.400382
2013-01-02   -0.609616
2013-01-03   -1.866863
2013-01-04   -0.936870
2013-01-05   -0.882675
2013-01-06    0.080795
Freq: D, Name: A, dtype: float64

In [11]:
labels = ['W', 'X', 'Y', 'Z']
list = [10, 20, 30, 40]
array = np.array([10, 20, 30, 40])
dict = {'w': 10, 'x': 30, 'y': 20, 'z': 40}

In [12]:
pd.Series(data=list)

0    10
1    20
2    30
3    40
dtype: int64

In [13]:
pd.Series(data=list, index=labels)

W    10
X    20
Y    30
Z    40
dtype: int64

In [14]:
# creating a series based on a dictionary
pd.Series(dict)

w    10
x    30
y    20
z    40
dtype: int64

In [15]:
a = {'w': 10, 'x': [30, 45, 89], 'y': ('ab', 'cd', 'ef'), 'z': {'a': 56, 'b': 89}}

In [16]:
pd.Series(a)

w                    10
x          [30, 45, 89]
y          (ab, cd, ef)
z    {'a': 56, 'b': 89}
dtype: object

### Using an index

#### indexing a series

In [17]:
sports1 = pd.Series([1, 2, 3, 4], index= ['Cricket', 'Football', 'Basketball', 'Golf'])

In [18]:
sports2 = pd.Series([1, 2, 5, 4], index= ['Cricket', 'Hockey', 'Basketball', 'Golf'])

In [19]:
sports1

Cricket       1
Football      2
Basketball    3
Golf          4
dtype: int64

In [20]:
sports2

Cricket       1
Hockey        2
Basketball    5
Golf          4
dtype: int64

In [21]:
sports1['Cricket']

1

##### Operations are also done based on index

In [22]:
sports1 + sports2

Basketball    8.0
Cricket       2.0
Football      NaN
Golf          8.0
Hockey        NaN
dtype: float64