In [2]:
import pandas as pd

In [3]:
stocks = pd.read_csv("/data/stocks.small.csv")

In [4]:
stocks.head()

Unnamed: 0,date,open,high,low,close,volume,adjclose,symbol
0,2000-07-18,144.8125,144.828125,141.4375,143.0,50683600.0,50.155473,INTC
1,2000-07-20,32.93751,34.25001,32.8125,33.75,3288300.0,8.789734,BEN
2,2000-07-24,64.25,67.312477,64.187523,64.75,948800.0,7.689567,APH
3,2000-07-26,21.875,22.125,20.9375,20.9375,1464300.0,15.61832,SHW
4,2000-07-26,42.0,42.312481,41.625,41.875,1397600.0,9.402721,STJ


In [5]:
len(stocks)

1846

# Common operations on pandas dataframe

1. Number of rows and columns 
2. Type of each column
3. Do we have any null values?
4. Slice the dataframe based on the row and/or column index
5. Slice the dataframe based on the column names
6. Filter
7. Grouping 
8. Sorting 
9. Merging 

In [6]:
type(stocks)

pandas.core.frame.DataFrame

In [7]:
stocks.shape

(1846, 8)

In [8]:
stocks.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1846 entries, 0 to 1845
Data columns (total 8 columns):
date        1846 non-null object
open        1846 non-null float64
high        1846 non-null float64
low         1846 non-null float64
close       1846 non-null float64
volume      1846 non-null float64
adjclose    1846 non-null float64
symbol      1846 non-null object
dtypes: float64(6), object(2)
memory usage: 115.5+ KB


In [9]:
type(stocks.dtypes)

pandas.core.series.Series

In [10]:
stocks.dtypes

date         object
open        float64
high        float64
low         float64
close       float64
volume      float64
adjclose    float64
symbol       object
dtype: object

In [11]:
dict(stocks.dtypes)

{'date': dtype('O'),
 'open': dtype('float64'),
 'high': dtype('float64'),
 'low': dtype('float64'),
 'close': dtype('float64'),
 'volume': dtype('float64'),
 'adjclose': dtype('float64'),
 'symbol': dtype('O')}

In [12]:
stocks.iloc[10:15, :]

Unnamed: 0,date,open,high,low,close,volume,adjclose,symbol
10,2000-08-23,41.125,41.375,40.187519,40.25,19686400.0,4.203095,NKE
11,2000-09-12,38.0,38.0,36.9375,37.6875,1431700.0,32.285666,IPG
12,2000-09-13,56.4375,57.6875,56.4375,57.125,4194400.0,15.66107,EXC
13,2000-09-22,42.75,43.0,42.5625,42.875,155100.0,20.370235,BXP
14,2000-10-02,176.25,176.312485,155.187515,156.999985,4101900.0,52.333328,BIIB


In [13]:
stocks.iloc[10:15, :5]

Unnamed: 0,date,open,high,low,close
10,2000-08-23,41.125,41.375,40.187519,40.25
11,2000-09-12,38.0,38.0,36.9375,37.6875
12,2000-09-13,56.4375,57.6875,56.4375,57.125
13,2000-09-22,42.75,43.0,42.5625,42.875
14,2000-10-02,176.25,176.312485,155.187515,156.999985


In [14]:
stocks.iloc[-5:, -5:]

Unnamed: 0,low,close,volume,adjclose,symbol
1841,114.089996,115.209999,2520700.0,115.209999,AET
1842,127.830002,128.220001,1576600.0,128.220001,UHS
1843,23.99,24.17,7115500.0,24.17,BSX
1844,62.490002,62.84,2740500.0,62.84,ADI
1845,27.9,28.049999,3899900.0,28.049999,CSX


In [15]:
stocks[["symbol", "date", "open", "close"]]

Unnamed: 0,symbol,date,open,close
0,INTC,2000-07-18,144.812500,143.000000
1,BEN,2000-07-20,32.937510,33.750000
2,APH,2000-07-24,64.250000,64.750000
3,SHW,2000-07-26,21.875000,20.937500
4,STJ,2000-07-26,42.000000,41.875000
5,GGP,2000-07-31,33.937519,33.875011
6,SBUX,2000-08-07,41.375038,42.812481
7,EQT,2000-08-09,55.562481,55.000000
8,BCR,2000-08-10,49.000000,52.125000
9,BEN,2000-08-15,35.687490,34.875000


In [16]:
stocks["pct_change"] = (stocks.close - stocks.open) * 100 / stocks.open
stocks[["symbol", "date", "open", "close", "pct_change"]]

Unnamed: 0,symbol,date,open,close,pct_change
0,INTC,2000-07-18,144.812500,143.000000,-1.251618
1,BEN,2000-07-20,32.937510,33.750000,2.466762
2,APH,2000-07-24,64.250000,64.750000,0.778210
3,SHW,2000-07-26,21.875000,20.937500,-4.285714
4,STJ,2000-07-26,42.000000,41.875000,-0.297619
5,GGP,2000-07-31,33.937519,33.875011,-0.184186
6,SBUX,2000-08-07,41.375038,42.812481,3.474179
7,EQT,2000-08-09,55.562481,55.000000,-1.012340
8,BCR,2000-08-10,49.000000,52.125000,6.377551
9,BEN,2000-08-15,35.687490,34.875000,-2.276680


In [17]:
stocks.sort_values("pct_change"
    , ascending = False)[["symbol", "date", "pct_change"]].iloc[:10, :]

Unnamed: 0,symbol,date,pct_change
894,ALK,2008-10-13,11.616167
871,CCL,2008-07-16,10.515807
132,ETFC,2001-12-05,10.0
33,FLIR,2000-12-11,9.090274
23,ENDP,2000-11-03,8.393369
105,HUM,2001-09-27,8.181818
759,KLAC,2007-07-17,8.120406
37,XLNX,2000-12-22,7.949791
425,FMC,2004-07-28,7.777783
893,MDT,2008-10-13,7.478685


In [18]:
stocks[stocks.symbol == "ALK"]

Unnamed: 0,date,open,high,low,close,volume,adjclose,symbol,pct_change
17,2000-10-23,20.812481,22.0,20.687519,21.812481,2096400.0,5.26093,ALK,4.804809
466,2005-01-13,30.18,30.65,29.780001,29.85,1104800.0,7.19949,ALK,-1.093439
830,2008-03-20,18.799999,19.52,18.620001,18.98,4722800.0,4.577766,ALK,0.957452
894,2008-10-13,19.799999,22.1,19.23,22.1,5260800.0,5.330276,ALK,11.616167
906,2008-11-18,25.34,25.549999,24.33,25.35,4426400.0,6.11414,ALK,0.039463
1323,2012-06-05,32.439999,32.84,32.189999,32.630001,1301000.0,15.739992,ALK,0.585703
1786,2016-03-01,74.199997,77.099998,73.959999,77.07,1364700.0,76.74588,ALK,3.867929


In [19]:
stocks.query("symbol == 'ALK'")

Unnamed: 0,date,open,high,low,close,volume,adjclose,symbol,pct_change
17,2000-10-23,20.812481,22.0,20.687519,21.812481,2096400.0,5.26093,ALK,4.804809
466,2005-01-13,30.18,30.65,29.780001,29.85,1104800.0,7.19949,ALK,-1.093439
830,2008-03-20,18.799999,19.52,18.620001,18.98,4722800.0,4.577766,ALK,0.957452
894,2008-10-13,19.799999,22.1,19.23,22.1,5260800.0,5.330276,ALK,11.616167
906,2008-11-18,25.34,25.549999,24.33,25.35,4426400.0,6.11414,ALK,0.039463
1323,2012-06-05,32.439999,32.84,32.189999,32.630001,1301000.0,15.739992,ALK,0.585703
1786,2016-03-01,74.199997,77.099998,73.959999,77.07,1364700.0,76.74588,ALK,3.867929


In [21]:
stocks.groupby("symbol").volume.mean()

symbol
A       4.817800e+06
AA      2.177992e+07
AAL     2.598900e+06
AAP     1.274240e+06
AAPL    1.663662e+08
ABC     1.828200e+06
ABT     1.314022e+07
ACN     3.198850e+06
ADBE    4.725700e+06
ADI     2.909117e+06
ADM     4.904967e+06
ADP     1.074600e+06
ADS     7.642000e+05
ADSK    3.393800e+06
AEE     9.350750e+05
AEP     1.866267e+06
AES     4.188586e+06
AET     5.830220e+06
AFL     1.896633e+06
AGN     1.079380e+06
AIG     3.441720e+06
AIV     8.612000e+05
AIZ     1.017925e+06
AKAM    3.719875e+06
ALB     5.175200e+05
ALK     2.896700e+06
ALL     2.904833e+06
ALLE    4.711000e+05
ALXN    1.463200e+06
AMAT    2.433693e+07
            ...     
VRTX    1.641950e+06
VTR     1.545000e+06
VZ      1.336177e+07
WAT     1.269814e+06
WBA     4.418425e+06
WDC     3.105371e+06
WEC     6.506000e+05
WFC     2.181037e+07
WFM     4.218267e+06
WHR     9.932800e+05
WM      2.534883e+06
WMB     7.113100e+06
WMT     1.216526e+07
WRK     2.397300e+06
WU      4.839500e+06
WY      1.098333e+06
WYN   

In [23]:
stocks.groupby("symbol").volume.agg(["mean", "count"])

Unnamed: 0_level_0,mean,count
symbol,Unnamed: 1_level_1,Unnamed: 2_level_1
A,4.817800e+06,2
AA,2.177992e+07,5
AAL,2.598900e+06,1
AAP,1.274240e+06,5
AAPL,1.663662e+08,3
ABC,1.828200e+06,2
ABT,1.314022e+07,6
ACN,3.198850e+06,2
ADBE,4.725700e+06,2
ADI,2.909117e+06,6


In [32]:
stocks.symbol.value_counts()

ZBH      11
UPS      11
K        10
FLIR      9
KR        9
PHM       9
D         9
COF       9
CMI       8
NKE       8
EXR       8
FMC       8
NUE       8
KMB       8
SIG       8
ETN       8
PPG       8
JNJ       8
WMT       8
IFF       8
HAR       7
WFC       7
BMY       7
BBBY      7
MU        7
MTB       7
EQT       7
CINF      7
NEM       7
WDC       7
         ..
VRSK      1
SEE       1
RIG       1
ADP       1
ALXN      1
CME       1
TXN       1
HBI       1
INTU      1
OI        1
DISCK     1
UA        1
TWX       1
MPC       1
KORS      1
MNK       1
ORCL      1
ROST      1
FB        1
COST      1
LVLT      1
CBG       1
NTAP      1
CMG       1
DD        1
KEY       1
XRX       1
WRK       1
RCL       1
NVDA      1
Name: symbol, Length: 471, dtype: int64