In [1]:
import pandas as pd

In [2]:
# Data types

strings = pd.Series(['a', 'b', 'c'])

categories = pd.Series(['a', 'b', 'c'], dtype='category')

ints = pd.Series([1, 2, 3])

floats = pd.Series([1.2, 3.4, 5.6])

bools = pd.Series([True, False, True])

dates = pd.Series([pd.to_datetime('31/12/2018')])

df = pd.DataFrame({'s': strings, 'f': floats})

In [3]:
print(type(strings))
print(strings)
print(strings.dtype)

<class 'pandas.core.series.Series'>
0    a
1    b
2    c
dtype: object
object


In [4]:
print(categories.dtype)
print(ints.dtype)
print(floats.dtype)
print(bools.dtype)
print(dates.dtype)

category
int64
float64
bool
datetime64[ns]


In [5]:
dates

0   2018-12-31
dtype: datetime64[ns]

In [6]:
dates[0]

Timestamp('2018-12-31 00:00:00')

In [7]:
print(type(df))
print(df.dtypes) # Note: dtypes for dataframes because more than one data type

<class 'pandas.core.frame.DataFrame'>
s     object
f    float64
dtype: object


In [8]:
# Create Series

single_series = pd.Series(5, index=[0, 1, 2])
single_series

0    5
1    5
2    5
dtype: int64

In [9]:
list_series = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
list_series

a    1
b    2
c    3
dtype: int64

In [10]:
import string

print(string.ascii_lowercase)
print(len(string.ascii_lowercase))
[i for i in string.ascii_lowercase]

abcdefghijklmnopqrstuvwxyz
26


['a',
 'b',
 'c',
 'd',
 'e',
 'f',
 'g',
 'h',
 'i',
 'j',
 'k',
 'l',
 'm',
 'n',
 'o',
 'p',
 'q',
 'r',
 's',
 't',
 'u',
 'v',
 'w',
 'x',
 'y',
 'z']

In [11]:
list_series = pd.Series([i for i in range(1, 27)], index=[i for i in string.ascii_lowercase])
list_series

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
k    11
l    12
m    13
n    14
o    15
p    16
q    17
r    18
s    19
t    20
u    21
v    22
w    23
x    24
y    25
z    26
dtype: int64

In [12]:
dict_series = pd.Series({'a': 1, 'b': 2, 'c': 3})
dict_series

a    1
b    2
c    3
dtype: int64

In [13]:
# Using zip 

dict_series_2 = pd.Series(dict(zip([i for i in string.ascii_lowercase], [i for i in range(1, 27)])))

print(dict_series_2)

a     1
b     2
c     3
d     4
e     5
f     6
g     7
h     8
i     9
j    10
k    11
l    12
m    13
n    14
o    15
p    16
q    17
r    18
s    19
t    20
u    21
v    22
w    23
x    24
y    25
z    26
dtype: int64


In [14]:
stock_data = pd.Series({'AAPL': 200, 'MSFT': 120, 'AMZN': 1800})
stock_data

AAPL     200
MSFT     120
AMZN    1800
dtype: int64

In [15]:
import numpy as np

numpy_series = pd.Series(np.array([1, 2, 3]))
numpy_series

0    1
1    2
2    3
dtype: int64

In [16]:
# Series functions

stock_data[0]

200

In [17]:
stock_data['AAPL']

200

In [18]:
stock_data[0:2]

AAPL    200
MSFT    120
dtype: int64

In [19]:
stock_data['AAPL': 'AMZN']

AAPL     200
MSFT     120
AMZN    1800
dtype: int64

In [20]:
print(stock_data.get('MSFT'))
print(stock_data.get(1))

120
120


In [21]:
print(stock_data.max())
print(stock_data.min())
print(stock_data.mean())

1800
120
706.6666666666666


In [22]:
stock_data.size

3

In [23]:
stock_data.describe()

count       3.000000
mean      706.666667
std       947.698968
min       120.000000
25%       160.000000
50%       200.000000
75%      1000.000000
max      1800.000000
dtype: float64

In [24]:
stock_data[0] = 210
stock_data

AAPL     210
MSFT     120
AMZN    1800
dtype: int64

In [25]:
stock_data['MSFT'] = 115
stock_data

AAPL     210
MSFT     115
AMZN    1800
dtype: int64

In [26]:
second_stock_data = pd.Series({'AMD': 27, 'NVDA': 190})

In [27]:
stock_data = stock_data.append(second_stock_data)

In [28]:
stock_data

AAPL     210
MSFT     115
AMZN    1800
AMD       27
NVDA     190
dtype: int64

In [29]:
stock_data.pop('AMD')

27

In [30]:
stock_data

AAPL     210
MSFT     115
AMZN    1800
NVDA     190
dtype: int64

In [31]:
stock_data.to_list()

[210, 115, 1800, 190]

In [32]:
stock_data.to_dict()

{'AAPL': 210, 'MSFT': 115, 'AMZN': 1800, 'NVDA': 190}

In [33]:
stock_data.sort_values()

MSFT     115
NVDA     190
AAPL     210
AMZN    1800
dtype: int64

In [34]:
stock_data

AAPL     210
MSFT     115
AMZN    1800
NVDA     190
dtype: int64

In [35]:
stock_data.sort_index()

AAPL     210
AMZN    1800
MSFT     115
NVDA     190
dtype: int64

In [36]:
stock_data.sort_index(ascending=False)

NVDA     190
MSFT     115
AMZN    1800
AAPL     210
dtype: int64

In [37]:
print(stock_data * 1.29)

AAPL     270.90
MSFT     148.35
AMZN    2322.00
NVDA     245.10
dtype: float64


In [38]:
stock_data

AAPL     210
MSFT     115
AMZN    1800
NVDA     190
dtype: int64

In [39]:
new_series = pd.Series({'AAPL': 1, 'MSFT': 2, 'AMZN': 3, 'NVDA': 4})
stock_data.multiply(new_series)

AAPL     210
MSFT     230
AMZN    5400
NVDA     760
dtype: int64

In [40]:
# Creating dataframes

stock_df = pd.DataFrame({'High':[2, 2.2, 2.4], 'Low':[1.5, 1.7, 2.1], 'Close':[2, 2.1, 2.2]})
stock_df

Unnamed: 0,High,Low,Close
0,2.0,1.5,2.0
1,2.2,1.7,2.1
2,2.4,2.1,2.2


In [41]:
open_prices = pd.Series([5, 5.6, 5.2], index=['Jan 1', 'Jan 2', 'Jan 3'])
close_prices = pd.Series([5.2, 5.7, 5.4], index=['Jan 2', 'Jan 3', 'Jan 4'])

In [42]:
close_prices

Jan 2    5.2
Jan 3    5.7
Jan 4    5.4
dtype: float64

In [43]:
stock_df2 = pd.DataFrame({'Open': open_prices, 'Close': close_prices})
stock_df2

Unnamed: 0,Open,Close
Jan 1,5.0,
Jan 2,5.6,5.2
Jan 3,5.2,5.7
Jan 4,,5.4


In [44]:
apple_stock_data = pd.read_csv('AAPL.csv', 
                               usecols=['Date', 'Open', 'High', 'Low', 'Close'],
                               parse_dates=True, 
                               index_col='Date')
apple_stock_data

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-10-01,227.949997,229.419998,226.350006,227.259995
2018-10-02,227.250000,230.000000,226.630005,229.279999
2018-10-03,230.050003,233.470001,229.779999,232.070007
2018-10-04,230.779999,232.350006,226.729996,227.990005
2018-10-05,227.960007,228.410004,220.580002,224.289993
2018-10-08,222.210007,224.800003,220.199997,223.770004
2018-10-09,223.639999,227.270004,222.250000,226.869995
2018-10-10,225.460007,226.350006,216.050003,216.360001
2018-10-11,214.520004,219.500000,212.320007,214.449997
2018-10-12,220.419998,222.880005,216.839996,222.110001


In [45]:
# Dataframe functions

apple_stock_data['Close']

Date
2018-10-01    227.259995
2018-10-02    229.279999
2018-10-03    232.070007
2018-10-04    227.990005
2018-10-05    224.289993
2018-10-08    223.770004
2018-10-09    226.869995
2018-10-10    216.360001
2018-10-11    214.449997
2018-10-12    222.110001
2018-10-15    217.360001
2018-10-16    222.149994
2018-10-17    221.190002
2018-10-18    216.020004
2018-10-19    219.309998
2018-10-22    220.649994
2018-10-23    222.729996
2018-10-24    215.089996
2018-10-25    219.800003
2018-10-26    216.300003
2018-10-29    212.240005
2018-10-30    213.300003
2018-10-31    218.860001
2018-11-01    222.220001
2018-11-02    207.479996
2018-11-05    201.589996
2018-11-06    203.770004
2018-11-07    209.949997
2018-11-08    208.490005
2018-11-09    204.470001
                 ...    
2019-08-19    210.350006
2019-08-20    210.360001
2019-08-21    212.639999
2019-08-22    212.460007
2019-08-23    202.639999
2019-08-26    206.490005
2019-08-27    204.160004
2019-08-28    205.529999
2019-08-29    209.00

In [46]:
apple_stock_data.loc['2019-09-24']

Open     221.029999
High     222.490005
Low      217.190002
Close    217.679993
Name: 2019-09-24 00:00:00, dtype: float64

In [47]:
apple_stock_data.iloc[0]

Open     227.949997
High     229.419998
Low      226.350006
Close    227.259995
Name: 2018-10-01 00:00:00, dtype: float64

In [48]:
apple_stock_data.iloc[0:5]

Unnamed: 0_level_0,Open,High,Low,Close
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-10-01,227.949997,229.419998,226.350006,227.259995
2018-10-02,227.25,230.0,226.630005,229.279999
2018-10-03,230.050003,233.470001,229.779999,232.070007
2018-10-04,230.779999,232.350006,226.729996,227.990005
2018-10-05,227.960007,228.410004,220.580002,224.289993


In [49]:
apple_stock_data['Open']['2018-10-01'] # retrieving using column 1st then row

227.94999700000002

In [50]:
apple_stock_data.loc['2018-10-04']['High'] # retrieving using row 1st then column (note: use loc/iloc)

232.350006

In [51]:
first_five = apple_stock_data.iloc[0:5]

In [52]:
first_five.describe()

Unnamed: 0,Open,High,Low,Close
count,5.0,5.0,5.0,5.0
mean,228.798001,230.730002,226.014002,228.178
std,1.525899,2.107333,3.343505,2.844152
min,227.25,228.410004,220.580002,224.289993
25%,227.949997,229.419998,226.350006,227.259995
50%,227.960007,230.0,226.630005,227.990005
75%,230.050003,232.350006,226.729996,229.279999
max,230.779999,233.470001,229.779999,232.070007


In [53]:
first_five.max()

Open     230.779999
High     233.470001
Low      229.779999
Close    232.070007
dtype: float64

In [54]:
first_five.max(axis=0)

Open     230.779999
High     233.470001
Low      229.779999
Close    232.070007
dtype: float64

In [55]:
first_five.max(axis=1)

Date
2018-10-01    229.419998
2018-10-02    230.000000
2018-10-03    233.470001
2018-10-04    232.350006
2018-10-05    228.410004
dtype: float64

In [56]:
first_five.idxmax(axis=0)

Open    2018-10-04
High    2018-10-03
Low     2018-10-03
Close   2018-10-03
dtype: datetime64[ns]

In [57]:
first_five.idxmax(axis=1)

Date
2018-10-01    High
2018-10-02    High
2018-10-03    High
2018-10-04    High
2018-10-05    High
dtype: object