In [1]:
import numpy as np
import pandas as pd

In [2]:
#Create a series; np.nan = no value
new_series = pd.Series([1,2,3,np.nan,4,5])

In [3]:
#display datatype and contents
new_series

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [4]:
#create datetime indexes of six days starting at jan1,2019
dates = pd.date_range('20190101', periods=6)

In [5]:
#Display datetime index
dates

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
#Create dataframe object using numpy array, indexed by datetime index, label columns
data_frame = pd.DataFrame(np.random.randn(6,4), index=dates, columns=list('ABCD'))

In [7]:
#display dataframe object
data_frame

Unnamed: 0,A,B,C,D
2019-01-01,0.023244,0.444771,1.630457,-1.878056
2019-01-02,0.550677,0.154209,0.480038,-1.840289
2019-01-03,-0.983206,1.317816,-0.600999,0.351172
2019-01-04,0.194104,-0.116287,-1.755096,0.596257
2019-01-05,0.044678,-2.096094,-0.421191,0.354385
2019-01-06,1.200713,-0.485956,-0.104726,-0.350438


In [8]:
#create dataframe object with dictionary passing column names through dictionary keys
data_frame2 = pd.DataFrame({'A': 1.,
                            'B': pd.Timestamp('20190102'),
                            'C': pd.Series(1, index = list(range(4))),
                            'D': np.array([3]*4, dtype='int32'),
                            'E': 'Yes'
                           })


In [9]:
#display dataframe object
data_frame2

Unnamed: 0,A,B,C,D,E
0,1.0,2019-01-02,1,3,Yes
1,1.0,2019-01-02,1,3,Yes
2,1.0,2019-01-02,1,3,Yes
3,1.0,2019-01-02,1,3,Yes


In [10]:
#display datatypes within each column of the dataframe dictionary
data_frame2.dtypes

A           float64
B    datetime64[ns]
C             int64
D             int32
E            object
dtype: object

In [11]:
#View Data indicating rows from the top
data_frame2.head(1)

Unnamed: 0,A,B,C,D,E
0,1.0,2019-01-02,1,3,Yes


In [12]:
#View Data indicating rows from the bottom
data_frame2.tail(1)

Unnamed: 0,A,B,C,D,E
3,1.0,2019-01-02,1,3,Yes


In [13]:
#Look up the index and datatype
data_frame2.index

Int64Index([0, 1, 2, 3], dtype='int64')

In [14]:
#List column names in dataframe object
data_frame2.columns

Index(['A', 'B', 'C', 'D', 'E'], dtype='object')

In [15]:
#List contents of dataframe using to_numpy() dependent on pandas update
data_frame.to_numpy()

array([[ 0.02324356,  0.44477072,  1.63045662, -1.87805552],
       [ 0.55067703,  0.15420882,  0.48003806, -1.84028918],
       [-0.98320592,  1.31781616, -0.60099917,  0.35117209],
       [ 0.19410396, -0.11628734, -1.75509642,  0.59625741],
       [ 0.0446778 , -2.09609406, -0.42119076,  0.35438516],
       [ 1.20071321, -0.48595568, -0.10472559, -0.35043783]])

In [16]:
#List contents of dataframe using to_numpy() dependent on pandas update
data_frame2.to_numpy()

array([[1.0, Timestamp('2019-01-02 00:00:00'), 1, 3, 'Yes'],
       [1.0, Timestamp('2019-01-02 00:00:00'), 1, 3, 'Yes'],
       [1.0, Timestamp('2019-01-02 00:00:00'), 1, 3, 'Yes'],
       [1.0, Timestamp('2019-01-02 00:00:00'), 1, 3, 'Yes']], dtype=object)

In [17]:
#Tabulate detailed summary statistics of each column in the data frame
data_frame.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.171702,-0.130257,-0.128586,-0.461161
std,0.717632,1.140151,1.133098,1.128271
min,-0.983206,-2.096094,-1.755096,-1.878056
25%,0.028602,-0.393539,-0.556047,-1.467826
50%,0.119391,0.018961,-0.262958,0.000367
75%,0.461534,0.37213,0.333847,0.353582
max,1.200713,1.317816,1.630457,0.596257


In [18]:
#Tabulate detailed summary statistics of each column in the data frame
data_frame2.describe()

Unnamed: 0,A,C,D
count,4.0,4.0,4.0
mean,1.0,1.0,3.0
std,0.0,0.0,0.0
min,1.0,1.0,3.0
25%,1.0,1.0,3.0
50%,1.0,1.0,3.0
75%,1.0,1.0,3.0
max,1.0,1.0,3.0


In [19]:
#Transpose Data
data_frame.T

Unnamed: 0,2019-01-01 00:00:00,2019-01-02 00:00:00,2019-01-03 00:00:00,2019-01-04 00:00:00,2019-01-05 00:00:00,2019-01-06 00:00:00
A,0.023244,0.550677,-0.983206,0.194104,0.044678,1.200713
B,0.444771,0.154209,1.317816,-0.116287,-2.096094,-0.485956
C,1.630457,0.480038,-0.600999,-1.755096,-0.421191,-0.104726
D,-1.878056,-1.840289,0.351172,0.596257,0.354385,-0.350438


In [20]:
#Transpose Data
data_frame2.T

Unnamed: 0,0,1,2,3
A,1,1,1,1
B,2019-01-02 00:00:00,2019-01-02 00:00:00,2019-01-02 00:00:00,2019-01-02 00:00:00
C,1,1,1,1
D,3,3,3,3
E,Yes,Yes,Yes,Yes


In [21]:
#Sort data frame column order (by axis) with sort_index 
data_frame.sort_index(axis=1, ascending=False)

Unnamed: 0,D,C,B,A
2019-01-01,-1.878056,1.630457,0.444771,0.023244
2019-01-02,-1.840289,0.480038,0.154209,0.550677
2019-01-03,0.351172,-0.600999,1.317816,-0.983206
2019-01-04,0.596257,-1.755096,-0.116287,0.194104
2019-01-05,0.354385,-0.421191,-2.096094,0.044678
2019-01-06,-0.350438,-0.104726,-0.485956,1.200713


In [22]:
#Sort values by values in column B
data_frame.sort_values(by='B')

Unnamed: 0,A,B,C,D
2019-01-05,0.044678,-2.096094,-0.421191,0.354385
2019-01-06,1.200713,-0.485956,-0.104726,-0.350438
2019-01-04,0.194104,-0.116287,-1.755096,0.596257
2019-01-02,0.550677,0.154209,0.480038,-1.840289
2019-01-01,0.023244,0.444771,1.630457,-1.878056
2019-01-03,-0.983206,1.317816,-0.600999,0.351172


In [23]:
#Display Column A from data_frame object along with dateindex
data_frame['A']

2019-01-01    0.023244
2019-01-02    0.550677
2019-01-03   -0.983206
2019-01-04    0.194104
2019-01-05    0.044678
2019-01-06    1.200713
Freq: D, Name: A, dtype: float64

In [24]:
#Display the first three rows in data_frame2 object (slice rows)
data_frame2[0:3]

Unnamed: 0,A,B,C,D,E
0,1.0,2019-01-02,1,3,Yes
1,1.0,2019-01-02,1,3,Yes
2,1.0,2019-01-02,1,3,Yes


In [25]:
#List rows corresponding to to specific date indexes in obj data_frame (slice rows)
data_frame['20190101':'20190103']

Unnamed: 0,A,B,C,D
2019-01-01,0.023244,0.444771,1.630457,-1.878056
2019-01-02,0.550677,0.154209,0.480038,-1.840289
2019-01-03,-0.983206,1.317816,-0.600999,0.351172


In [26]:
#List dataframe values in all columns corresponding to date index 0 (the first row, date index)
data_frame.loc[dates[0]]

A    0.023244
B    0.444771
C    1.630457
D   -1.878056
Name: 2019-01-01 00:00:00, dtype: float64

In [27]:
#Display all rows in columns A and B
data_frame.loc[:,['A', 'B']]

Unnamed: 0,A,B
2019-01-01,0.023244,0.444771
2019-01-02,0.550677,0.154209
2019-01-03,-0.983206,1.317816
2019-01-04,0.194104,-0.116287
2019-01-05,0.044678,-2.096094
2019-01-06,1.200713,-0.485956


In [28]:
#Display all rows corresponding to specific dates in columns A & B
data_frame.loc['20190101': '20190104', ['A', 'B']]

Unnamed: 0,A,B
2019-01-01,0.020301,-0.224572
2019-01-02,0.171315,1.784813
2019-01-03,1.347328,-0.821988
2019-01-04,-1.363023,0.203241


In [28]:
#List values in columns A and B corresponding to the first row, date index Jan 1 2019
data_frame.loc['20190101', ['A', 'B']]

A    0.023244
B    0.444771
Name: 2019-01-01 00:00:00, dtype: float64

In [29]:
#Display the scalar value in the first row in column A
data_frame.loc[dates[0], 'A']

0.02324356257387112

In [30]:
#Display the scalar value in the first row in column A
data_frame.at[dates[0], 'A']

0.02324356257387112

In [31]:
#List values in DataFrame from row 3 from all columns
data_frame.iloc[3]

A    0.194104
B   -0.116287
C   -1.755096
D    0.596257
Name: 2019-01-04 00:00:00, dtype: float64

In [32]:
#Display values in rows 1, 2, 4 and column 0 and 2 
data_frame.iloc[[1, 2, 4], [0,2]]

Unnamed: 0,A,C
2019-01-02,0.550677,0.480038
2019-01-03,-0.983206,-0.600999
2019-01-05,0.044678,-0.421191


In [33]:
#Display rows 2 - 4 (1 - 3) and all columns
data_frame2.iloc[1:4, :]

Unnamed: 0,A,B,C,D,E
1,1.0,2019-01-02,1,3,Yes
2,1.0,2019-01-02,1,3,Yes
3,1.0,2019-01-02,1,3,Yes


In [34]:
#Display all dataframe rows, the first column and the fifth column
data_frame2.iloc[:, [0,4]]

Unnamed: 0,A,E
0,1.0,Yes
1,1.0,Yes
2,1.0,Yes
3,1.0,Yes


In [35]:
#Display the scalar value in the second row and second column
data_frame.iloc[1,1]

0.15420881798036798

In [36]:
#Display the string in dataframe2 in the third row and fifth column using .iat method
data_frame2.iat[2,4]

'Yes'

In [42]:
#Filter data_frame by values in column A greather than zero
data_frame[data_frame.A>0]

Unnamed: 0,A,B,C,D
2019-01-01,0.023244,0.444771,1.630457,-1.878056
2019-01-02,0.550677,0.154209,0.480038,-1.840289
2019-01-04,0.194104,-0.116287,-1.755096,0.596257
2019-01-05,0.044678,-2.096094,-0.421191,0.354385
2019-01-06,1.200713,-0.485956,-0.104726,-0.350438


In [38]:
#Filter data_frame by values in all columns greater than zero
data_frame[data_frame>0]

Unnamed: 0,A,B,C,D
2019-01-01,0.023244,0.444771,1.630457,
2019-01-02,0.550677,0.154209,0.480038,
2019-01-03,,1.317816,,0.351172
2019-01-04,0.194104,,,0.596257
2019-01-05,0.044678,,,0.354385
2019-01-06,1.200713,,,


In [39]:
#Filter data_frame by values in column A less than than zero
data_frame[data_frame<0]

Unnamed: 0,A,B,C,D
2019-01-01,,,,-1.878056
2019-01-02,,,,-1.840289
2019-01-03,-0.983206,,-0.600999,
2019-01-04,,-0.116287,-1.755096,
2019-01-05,,-2.096094,-0.421191,
2019-01-06,,-0.485956,-0.104726,-0.350438


In [40]:
#Copy DataFrame into object DataFrame2
data_frame2 = data_frame.copy()

In [41]:
#Create column E in dataframe2 and list values
data_frame2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']

In [43]:
#Display DataFrame2
data_frame2

Unnamed: 0,A,B,C,D,E
2019-01-01,0.020301,-0.224572,0.500634,-0.251051,one
2019-01-02,0.171315,1.784813,-0.831449,-0.596673,one
2019-01-03,1.347328,-0.821988,-0.428355,-0.610691,two
2019-01-04,-1.363023,0.203241,3.178943,-0.15664,three
2019-01-05,-0.336574,-1.776838,0.129316,-1.576318,four
2019-01-06,-0.95872,0.719363,0.098674,-0.836064,three


In [44]:
#Filter DataFrame2 by values in column E equal to two and four
data_frame2[data_frame2['E'].isin(['two', 'four'])]

Unnamed: 0,A,B,C,D,E
2019-01-03,1.347328,-0.821988,-0.428355,-0.610691,two
2019-01-05,-0.336574,-1.776838,0.129316,-1.576318,four
