In [1]:
import numpy as np
import pandas as pd

In [2]:
df1 = pd.DataFrame([[1.4, np.nan], [7.1, -4.5],
                   [np.nan, np.nan], [0.75, -1.3]],
                  index=list('abcd'),
                  columns=['one', 'two'])
df1

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [3]:
df1.sum()

one    9.25
two   -5.80
dtype: float64

In [4]:
df1.sum(axis='columns')

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [5]:
df1.mean(axis=1, skipna=False)

a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64

In [6]:
df1.idxmax()

one    b
two    d
dtype: object

In [7]:
df1.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.5,-4.5
c,,
d,9.25,-5.8


In [8]:
df1.idxmin()

one    d
two    b
dtype: object

In [9]:
df1.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [10]:
obj1 = pd.Series(list('aabc') * 4)

In [11]:
obj1.describe()

count     16
unique     3
top        a
freq       8
dtype: object

In [15]:
import pandas_datareader.data as web

In [16]:
all_data = {ticker: web.get_data_yahoo(ticker)
           for ticker in ['AAPL', 'AMZN', 'MSFT', 'GOOG', 'TSLA']}
price = pd.DataFrame({ticker: data['Adj Close']
                     for ticker, data in all_data.items()})
volume = pd.DataFrame({ticker: data['Volume']
                      for ticker, data in all_data.items()})

In [17]:
returns = price.pct_change()

In [19]:
returns.tail()

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG,TSLA
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-09-04,0.000662,-0.021787,-0.014036,-0.030941,0.027813
2020-09-08,-0.067295,-0.043944,-0.054096,-0.036863,-0.210628
2020-09-09,0.039887,0.037707,0.042584,0.016034,0.109234
2020-09-10,-0.032646,-0.028605,-0.028018,-0.016018,0.013815
2020-09-11,-0.013129,-0.018547,-0.006525,-0.007376,0.003716


In [20]:
returns['AAPL'].corr(returns['GOOG'])

0.654442545106344

In [23]:
returns['AAPL'].cov(returns['GOOG'])

0.0002008880563268999

In [24]:
returns.AAPL.corr(returns.TSLA)

0.38253064328516634

In [25]:
returns.corr()

Unnamed: 0,AAPL,AMZN,MSFT,GOOG,TSLA
AAPL,1.0,0.572368,0.703489,0.654443,0.382531
AMZN,0.572368,1.0,0.672507,0.681221,0.374341
MSFT,0.703489,0.672507,1.0,0.78307,0.407938
GOOG,0.654443,0.681221,0.78307,1.0,0.378032
TSLA,0.382531,0.374341,0.407938,0.378032,1.0


In [26]:
returns.cov()

Unnamed: 0,AAPL,AMZN,MSFT,GOOG,TSLA
AAPL,0.000346,0.000201,0.000229,0.000201,0.000253
AMZN,0.000201,0.000357,0.000222,0.000212,0.000252
MSFT,0.000229,0.000222,0.000306,0.000226,0.000254
GOOG,0.000201,0.000212,0.000226,0.000272,0.000222
TSLA,0.000253,0.000252,0.000254,0.000222,0.001268


In [27]:
returns.corrwith(returns.TSLA)

AAPL    0.382531
AMZN    0.374341
MSFT    0.407938
GOOG    0.378032
TSLA    1.000000
dtype: float64

In [28]:
returns.corrwith(volume)

AAPL   -0.078666
AMZN   -0.048266
MSFT   -0.055283
GOOG   -0.152358
TSLA    0.114310
dtype: float64

In [31]:
returns.corrwith(volume, axis='columns')

Date
2015-09-14         NaN
2015-09-15   -0.066350
2015-09-16   -0.296155
2015-09-17   -0.857773
2015-09-18    0.212609
                ...   
2020-09-04    0.490701
2020-09-08   -0.339805
2020-09-09    0.270279
2020-09-10   -0.049937
2020-09-11   -0.047315
Length: 1259, dtype: float64

In [32]:
obj = pd.Series(list('cadaabbcc'))
obj

0    c
1    a
2    d
3    a
4    a
5    b
6    b
7    c
8    c
dtype: object

In [33]:
uniques = obj.unique()
uniques

array(['c', 'a', 'd', 'b'], dtype=object)

In [34]:
obj.value_counts()

c    3
a    3
b    2
d    1
dtype: int64

In [35]:
pd.value_counts(obj.values, sort=False)

d    1
a    3
b    2
c    3
dtype: int64

In [36]:
mask = obj.isin(list('bc'))
mask

0     True
1    False
2    False
3    False
4    False
5     True
6     True
7     True
8     True
dtype: bool

In [37]:
obj[mask]

0    c
5    b
6    b
7    c
8    c
dtype: object

In [38]:
to_match = pd.Series(list('cabbca'))
unique_vals = pd.Series(list('cba'))

In [39]:
pd.Index(unique_vals).get_indexer(to_match)

array([0, 2, 1, 1, 0, 2])

In [40]:
data = pd.DataFrame({'Q1': [1,3,4,3,4],
                    'Q2': [2,3,1,2,3],
                    'Q3': [1,5,2,4,4]})
data

Unnamed: 0,Q1,Q2,Q3
0,1,2,1
1,3,3,5
2,4,1,2
3,3,2,4
4,4,3,4


In [43]:
result = data.apply(pd.value_counts).fillna(0)
result

Unnamed: 0,Q1,Q2,Q3
1,1.0,1.0,1.0
2,0.0,2.0,1.0
3,2.0,2.0,0.0
4,2.0,0.0,2.0
5,0.0,0.0,1.0
