In [3]:
import numpy as np
import pandas as pd

a = np.array([4,7,-5,3])
obj = pd.Series(a)
obj.array, obj.index

(<PandasArray>
 [4, 7, -5, 3]
 Length: 4, dtype: int32,
 RangeIndex(start=0, stop=4, step=1))

In [12]:
#define array and attach your own desired index column with their values
obj2 = pd.Series([4,7,-5,3], index=["d","b","a","c"])
obj2

#if you want to check specific value by calling the index value:
obj2["a"]
#you can set value to specific row
obj2["c"]=6
obj2

#try showing only specific rows as a list of ndexes
rows = np.array(["c","a"])
obj2[rows]

#you can use operations to show you only those values that filter
obj2[obj2 > 0]

#you can do some math too
obj2[obj2 > 0]*2

#you can do some other complex math
np.exp(obj2)

#you can check true false operation too
"a" in obj2, "w" in obj2

(True, False)

In [18]:
#passing data from dictionary into Series where key is index and value is normal value
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
obj3

#convert back to dictionary:
a = obj3.to_dict()
a

#you can override passing the index with dictionary keys
#it will try to match states with dictionary key. Ex:California is not in dict key and that is why it is NULL
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index=states)
obj3, obj4

#is null and is not null
pd.isna(obj4), pd.notna(obj4)

#the same thing you can do it like this
obj4.isna(), obj4.notna()

(Ohio      35000
 Texas     71000
 Oregon    16000
 Utah       5000
 dtype: int64,
 California        NaN
 Ohio          35000.0
 Oregon        16000.0
 Texas         71000.0
 dtype: float64)

In [22]:
#pandas will automatically match 2 dataset with the same index. Pandas has automatic joins in index values
obj3+obj4

#you can give column name for the values in pandas
obj4.name = "population"
#you can assign index column name too
obj4.index.name = "state"
obj4

#you can additionally assign different values for the index
a = np.array([4,3,-1,7])
b = pd.Series(a)

b.index = ["Bob", "Steve", "Jeff", "Ryan"]
b

Bob      4
Steve    3
Jeff    -1
Ryan     7
dtype: int32

DATA FRAME

In [48]:
#Data Frame
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
        "year": [2000, 2001, 2002, 2001, 2002, 2003],
        "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

#head() method shws first 5 rows and tail() method shows last 5 rows, or you can show just 2 rows
frame.head(), frame.head(2), frame.tail()

#if you want you can arrange the order of the columns
pd.DataFrame(data, columns = ["year", "state","pop"])

#try to add column that is not defined in the dictionary, it will create that new column but with null values
frame2 = pd.DataFrame(data, columns = ["year", "state","pop","debt"])
frame2

#show column names
frame2.columns

#you can show just one column with its data values like Series
frame2["state"]

#or you can do it like this, but be carrefull, this can conflict with 
#Python variable names in syntax and can show you error. If column name contain whitespace, any symbols
frame2.state

#you can retrieve row with .loc or .iloc attributes
frame2.loc[1], frame2.iloc[2]

#you can assign value by the column in all rows
frame2["debt"] = 16.5

#or you can arrange series of numbers
frame2["debt"] = np.arange(6.)

#you can assign from series
val = pd.Series([-1.2,-1.5,-1.7], index=[2,4,5])
frame2["debt"] = val

#you can add column that does not exist and assign the value for that col
frame2["eastern"] = frame2["state"] == "Ohio"

#do some math and add that column
frame2["totalAmt"] = frame2["pop"] * frame2["debt"]
frame2

Unnamed: 0,year,state,pop,debt,eastern,totalAmt
0,2000,Ohio,1.5,,True,
1,2001,Ohio,1.7,,True,
2,2002,Ohio,3.6,-1.2,True,-4.32
3,2001,Nevada,2.4,,False,
4,2002,Nevada,2.9,-1.5,False,-4.35
5,2003,Nevada,3.2,-1.7,False,-5.44


In [49]:
#you can delete column
del frame2["eastern"]
frame2

Unnamed: 0,year,state,pop,debt,totalAmt
0,2000,Ohio,1.5,,
1,2001,Ohio,1.7,,
2,2002,Ohio,3.6,-1.2,-4.32
3,2001,Nevada,2.4,,
4,2002,Nevada,2.9,-1.5,-4.35
5,2003,Nevada,3.2,-1.7,-5.44


In [60]:
#nested dictionary
#outer dictionary will represent columns and inner dictionary will represent rows indexes
populations = {"Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, 
               "Nevada": {2001: 2.4, 2002: 2.9}}
frame3 = pd.DataFrame(populations)

#but you can transpose if you want
frame3, frame3.T

#you can explicitly assign which will represent the index
a = pd.DataFrame(populations, index=[2001, 2002, 2003])

#you can slice values if you need
pdata = {"Ohio": frame3["Ohio"].iloc[:-1], 
         "Nevada": frame3["Nevada"].iloc[:2]}
a = pd.DataFrame(pdata)

#try to give the name of the index and column but it will treat them as Series
#Data Frame does not have "name" attribute
frame3.index.name = "year"
frame3.columns.name = "state"

#when you convert dataframe to numpy, it will treat the table as a matrix without the name attributes
frame3.to_numpy()
#or like this, it is the same result
np.array(frame3)

#you can retrieve dataframe with different data types to numpy
np.array(frame3),np.array(frame2)

(array([[1.5, nan],
        [1.7, 2.4],
        [3.6, 2.9]]),
 array([[2000, 'Ohio', 1.5, nan, nan],
        [2001, 'Ohio', 1.7, nan, nan],
        [2002, 'Ohio', 3.6, -1.2, -4.32],
        [2001, 'Nevada', 2.4, nan, nan],
        [2002, 'Nevada', 2.9, -1.5, -4.35],
        [2003, 'Nevada', 3.2, -1.7, -5.44]], dtype=object))

Index Objects

In [74]:
#any array or other sequence of labels you use when constructing DataFrame or Series, is internally converted to index
obj = pd.Series(np.arange(3), index=["a","b","c"])
obj.index[1:]

#define index externaly
labelind = [0,1,2]
label = pd.Index(labelind)
label
obj2 = pd.Series([1.5,-2.5,0], index=label)

#you can check if a value exist in index or column
"Ohio" in frame3.columns, 2002 in frame3.index

(True, True)

In [103]:
#indexes in pandas can contain duplicates
pd.Index(["foo", "foo", "bar", "bar"])

#show only different indexes
frame3.index.difference([2001,2003,2004,2005])

#concat() work only if number of columns are the same
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)

df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[0, 5, 2, 7],
)

df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B9", "B10", "B11"],
        "C": ["C8", "C9", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    },
    index=[8, 9, 3, 11],
)

concatFrame = [df1,df2,df3]
result = pd.concat(concatFrame)

#you can mark those 3 dataframes
result = pd.concat(concatFrame, keys=["x", "y", "z"])
result, result.loc["x"]

#you can concatenate through columns
df4 = pd.DataFrame(
    {
        "B": ["B2", "B3", "B6", "B7"],
        "D": ["D2", "D3", "D6", "D7"],
        "F": ["F2", "F3", "F6", "F7"],
    },
    index=[2, 3, 6, 7],
)
result = pd.concat([df1,df4], axis=1)
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3
6,,,,,B6,D6,F6
7,,,,,B7,D7,F7


REINDEXING

In [12]:
#reindexing
import pandas as pd
import numpy as np

obj = pd.Series([4.5, 7.2, -5.3, 3.6], index=["d", "b", "a", "c"])
obj
#this will rearange the index according to new index
o = obj.reindex(["a","b","c","d","e"])
obj, o

#you'll need some times to do some interpolation or filling of values when reindexing
#ffill will forward fill the empty values to handle nulls or nans
obj3 = pd.Series(["blue", "purple", "yellow"], index=[0,2,4])
o = obj3.reindex(np.arange(6), method="ffill")
obj3,o

#reindex when using DataFrame can alter the row index, columns or both
#automatic filling with data
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), index=["a", "c", "d"], columns=["Ohio", "Texas", "California"])

#let's reindex
frame2 = frame.reindex(index=["a","b","c","d"])
frame, frame2

#let's reindex by column keywords
states = ["Texas", "Utah", "California"]
frame = frame.reindex(columns=states)
frame2 = frame2.reindex(columns=states)
frame, frame2

#another way to reindex is by specifying columns
#reindex can be done by index or by columns
frame.reindex(states, axis="columns")

#by using loc method, but if index value does not exist it will throw error
#use reindex if you need to change the behaviour of missing values
frame.loc[["a","d","c"], ["California", "Texas"]]

Unnamed: 0,California,Texas
a,2,1
d,8,7
c,5,4


In [19]:
#dropping rows by index
obj = pd.Series(np.arange(5.), index=["a", "b", "c", "d", "e"])

new_obj = obj.drop("c")
obj, new_obj

#drop from arrays
dels = ["d","c"]
obj.drop(dels)

#delete data from either axis 0 or 1 (index or column)
data = pd.DataFrame(np.arange(16).reshape((4, 4)), 
                    index=["Ohio", "Colorado", "Utah", "New York"],
                    columns=["one", "two", "three", "four"])

#you can frop with this command too 
#data.drop("two", axis=1) is equal with data.drop(columns=["two"]) is also equal with data.drop(["two","four"], axis="columns")
data.drop(index=["Colorado","Ohio"]), data.drop(columns=["two"])


(          one  two  three  four
 Utah        8    9     10    11
 New York   12   13     14    15,
           one  three  four
 Ohio        0      2     3
 Colorado    4      6     7
 Utah        8     10    11
 New York   12     14    15)

Indexing, Selection, and Filtering

In [25]:
#filtering and selection
obj = pd.Series(np.arange(4.), index=["a", "b", "c", "d"])

#all these ways are correct when you need to do filtering
obj["b"]
obj[1]
obj[2:4]
obj[["b","a","d"]]
obj[[1,3]]
obj[obj < 2]

#but most prefered way is by .loc operator
obj.loc[["b","a","d"]]

#but if you use index locations .loc will fail
#instead do this:
#both these filter methods will return the same result
obj.iloc[1:3], obj.iloc[[1,2]]

#you can assign values
obj.iloc[1:3] = 5
obj

a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64

In [43]:
#filtering and indexing in dataframes
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=["Ohio", "Colorado", "Utah", "New York"],
                    columns=["one", "two", "three", "four"])
#it will show only column "two"
data["two"]

#if you need multiple columns
cols = ["three","one"]
data[cols]

#you can do slicing only first 2 rows and all columns
data[:2]

#you can filter only values where column "three" is greater than 5
data[data["three"] > 5]

#you can return dataframe with boolean values
data < 5

#you can assign value to only those fields where value is smaller than 5
data[data < 5] = 0
data

#as in Series, you can call loc method to select indexes
data.loc["Colorado"]

#select multiple rows
data.loc[["Colorado","New York"]]

#rows and columns (using both axis)
data.loc["Colorado", ["two","three"]]

#you can achieve the same by using index positions .iloc
data.iloc[[2,1]]

#both rows and columns, both axis
data.iloc[2, [3,0,1]]
data.iloc[[2,1], [3,0,1]]

#retrieve all rows until "Utah" ("Utah" is inclusive, it is different than numpy where "Utah will exclusive")
#and show ony column "two"
data.loc[:"Utah","two"]

#do combination: all rows, all columns until 3 (inclusive don't forget) where column "three" > 5
data.iloc[:,:3][data["three"] > 5]

#you can achieve like this
data.loc[:,:"three"][data["three"] > 5]

#you can do simple filtering
data.loc[data["three"] >=2]

Unnamed: 0,one,two,three,four
Colorado,0,5,6,7
Utah,8,9,10,11
New York,12,13,14,15


In [59]:
#it will treat positioning obj[-1] will return error
ser = pd.Series(np.arange(3.))
#this will show error
#ser[-1]

#this will not show any error
ser[2]

#or
ser[:2]

#but this will not show any error
ser2 = pd.Series(np.arange(3.), index=["a", "b", "c"])
ser2[-1]

#the same  you can achieve
ser2.iloc[-1]

#you can assign value to a column
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=["Ohio", "Colorado", "Utah", "New York"],
                    columns=["one", "two", "three", "four"])
data.loc[:,"one"] = 1

#or you can assign to the whole row
data.iloc[2] = 5

#you can assign by filtering
data.loc[data["four"] > 5] =3

#you can assign only to columns "three"
data1 = pd.DataFrame(np.arange(16).reshape((4, 4)),
                    index=["Ohio", "Colorado", "Utah", "New York"],
                    columns=["one", "two", "three", "four"])
data1.iloc[2] = 5
data1.loc[data1["three"] ==5] = -1

#assign only column three value
data1.loc[data1["three"] ==5, "three"] = -1
data1

Unnamed: 0,one,two,three,four
Ohio,0,1,2,3
Colorado,4,5,6,7
Utah,-1,-1,-1,-1
New York,12,13,14,15


In [5]:
#daata alignment
import pandas as pd
import numpy as np

s1 = pd.Series([7.3, -2.5, 3.4, 1.5], 
               index=["a", "c", "d", "e"])

s2 = pd.Series([-2.1, 3.6, -1.5, 4, 3.1],
                         index=["a", "c", "e", "f", "g"])

#s1+s2 will do outer join where not matched indexes will be NaN or numll
s1, s2, s1+s2

#let's see the same for DataFrames now
df1 = pd.DataFrame(np.arange(9.).reshape((3, 3)), columns=list("bcd"),
                   index=["Ohio", "Texas", "Colorado"])

df2 = pd.DataFrame(np.arange(12.).reshape((4, 3)), columns=list("bde"),
                   index=["Utah", "Ohio", "Texas", "Oregon"])

#arithetic df1+df2 will match outer join for index and outer join with cols, but unmatched cols and rows will be NaN
#column "c" and "e" are not matched on both datasets and that's why they all have NaN
df1, df2, df1+df2

(            b    c    d
 Ohio      0.0  1.0  2.0
 Texas     3.0  4.0  5.0
 Colorado  6.0  7.0  8.0,
           b     d     e
 Utah    0.0   1.0   2.0
 Ohio    3.0   4.0   5.0
 Texas   6.0   7.0   8.0
 Oregon  9.0  10.0  11.0,
             b   c     d   e
 Colorado  NaN NaN   NaN NaN
 Ohio      3.0 NaN   6.0 NaN
 Oregon    NaN NaN   NaN NaN
 Texas     9.0 NaN  12.0 NaN
 Utah      NaN NaN   NaN NaN)

In [20]:
df1 = pd.DataFrame(np.arange(12.).reshape((3, 4)), columns=list("abcd"))
df2 = pd.DataFrame(np.arange(20.).reshape((4, 5)), columns=list("abcde"))
df2.loc[1, "b"] = np.nan
df2

#You can do artihmetic operations but when you have null + value = null
df1 + df2

#but if you need something like isnull(df1,0)+isnull(df2,0)=val
df1.add(df2, fill_value=0)

#both these are equivalent nullif
1/df1, df1.rdiv(1)

#Reindexing a series
df1, df1.reindex(columns=df2.columns, fill_value=0)

(     a    b     c     d
 0  0.0  1.0   2.0   3.0
 1  4.0  5.0   6.0   7.0
 2  8.0  9.0  10.0  11.0,
      a    b     c     d  e
 0  0.0  1.0   2.0   3.0  0
 1  4.0  5.0   6.0   7.0  0
 2  8.0  9.0  10.0  11.0  0)

In [28]:
#operations between dataframe and series
frame = pd.DataFrame(np.arange(12.).reshape((4, 3)),
                     columns=list("bde"),
                     index=["Utah", "Ohio", "Texas", "Oregon"])
#you can do the same operation as numpy a = np.array([[1,2,3],[4,5,6],[5,6,7]]) => a - a[0] = [[0,0,0],[3,3,3],[4,4,4]]
series = frame.iloc[0]
#when you matching series and dataframe, only the columns are matched
frame, series, frame - series

#but if index is not found, outer join or union will happen and not matched columns will be nulls or NaN
series2 = pd.Series(np.arange(3), index=["b", "e", "f"])

frame+series2

#broadcasting over the column "d"
series3 = frame["d"]
series3, frame, frame.sub(series3, axis="index"), frame.rsub(series3, axis="index")

#broadcasting over the row "Ohio"
series3 = frame.loc["Ohio"]
series3, frame, frame.sub(series3, axis="columns"), frame.rsub(series3, axis="columns")

(b    3.0
 d    4.0
 e    5.0
 Name: Ohio, dtype: float64,
           b     d     e
 Utah    0.0   1.0   2.0
 Ohio    3.0   4.0   5.0
 Texas   6.0   7.0   8.0
 Oregon  9.0  10.0  11.0,
           b    d    e
 Utah   -3.0 -3.0 -3.0
 Ohio    0.0  0.0  0.0
 Texas   3.0  3.0  3.0
 Oregon  6.0  6.0  6.0,
           b    d    e
 Utah    3.0  3.0  3.0
 Ohio    0.0  0.0  0.0
 Texas  -3.0 -3.0 -3.0
 Oregon -6.0 -6.0 -6.0)

In [43]:
#function application and mapping
frame = pd.DataFrame(np.random.standard_normal((4, 3)),
                     columns=list("bde"),
                     index=["Utah", "Ohio", "Texas", "Oregon"])

#numpy functions working very well with pandas dataframe
frame, np.abs(frame), np.sum(frame, axis=0), np.sum(frame, axis=1)

#working with function
def f1(x):
    return x.max() - x.min()

#apply function sends frame dataset as argument f1(x=frame)
frame.apply(f1), f1(frame)

#but with apply you can do by columns axis too
frame.apply(f1, axis="columns")

#functions can return series too
def f2(x):
    return pd.Series([x.min(), x.max()], index=["min","max"])

frame.apply(f2), frame.apply(f2, axis=1)

#with 2 decimal places
def my_format(x):
    return f"{x:.2f}"

frame.applymap(my_format)

#you can map only specific column
frame["e"].map(my_format)

Utah      -0.60
Ohio      -0.67
Texas     -0.16
Oregon    -0.30
Name: e, dtype: object

SORTING AND RANKING

In [51]:
#sorting
obj = pd.Series(np.arange(4), index=["d", "a", "b", "c"])
obj.sort_index()

#when using dataframes you can sort by axis. Either rows or columns
frame = pd.DataFrame(np.arange(8).reshape((2, 4)),
                     index=["three", "one"],
                     columns=["d", "a", "b", "c"])
frame.sort_index(axis="index"), frame.sort_index(axis="columns")

#you can sort by descending order
frame.sort_index(axis="index", ascending=False), frame.sort_index(axis="columns", ascending=False)

#if you need to sort by values
obj.sort_values()

#if there are missing values NaN, then missing values will be sorted at the end
obj = pd.Series([4, np.nan, 7, np.nan, -3, 2])
obj.sort_values()

#if you need to sort at the beginning then
obj.sort_values(na_position="first")

#when you need to sort on dataframes, you can add mutiple columns
frame = pd.DataFrame({"b": [4, 7, -3, 2], "a": [0, 1, 0, 1]})
frame.sort_values("b"), frame.sort_values(["a","b"])

(   b  a
 2 -3  0
 3  2  1
 0  4  0
 1  7  1,
    b  a
 2 -3  0
 0  4  0
 3  2  1
 1  7  1)

In [55]:
#rank will get the mean for duplicat values in series
obj = pd.Series([7, -5, 7, 4, 2, 0, 4])
#-5 = 1.0, 0 = 2.0, 2 = 3.0, 4 = (4+4)/2, 7 = (7+7)/2
obj.rank()

#but you can order without mean() and just assigning the order first it gets
obj.rank(method="first")

#rank in descending order
obj.rank(ascending=False)

frame = pd.DataFrame({"b": [4.3, 7, -3, 2], 
                      "a": [0, 1, 0, 1],
                      "c": [-2, 5, 8, -2.5]})
frame.rank(axis="columns")

#rank() have different tie-braking methods ("average", "min","max","first","dense")

Unnamed: 0,b,a,c
0,3.0,2.0,1.0
1,3.0,1.0,2.0
2,1.0,2.0,3.0
3,3.0,2.0,1.0


In [56]:
#check uniqueness in index
obj = pd.Series(np.arange(5), index=["a", "a", "b", "b", "c"])
obj.index.is_unique

False

SUMMARIZING AND COMPUTING DESCRIPTIVE STATISTICS

In [71]:
df = pd.DataFrame([[1.4, np.nan], 
                   [7.1, -4.5],
                   [np.nan, np.nan], 
                   [0.75, -1.3]],
                  index=["a", "b", "c", "d"],
                  columns=["one", "two"])
#default is axis=0 o axis="index"
#nan are summed to 0
df.sum(), df.sum(axis=1), df.sum(axis="columns")

#if you want you can treat as nan not 0
df.sum(axis=0, skipna=False), df.sum(axis=1, skipna=False)

#show index of max value or index of min value
df.idxmax(), df.idxmax(axis=1)

#accumulative sum very important for statistics
df.cumsum(), df.cumsum(axis="columns")

#you can check multiple summaries from dataset
df.describe()

#when you don't have numerical data in dataset, describe shows different summaries
obj = pd.Series(["a", "a", "b", "c"] * 4)
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object

In [77]:
#correlation and covariance
price = pd.read_pickle("myCSV/yahoo_price.pkl")
volume = pd.read_pickle("myCSV/yahoo_volume.pkl")

returns = price.pct_change()
returns.tail()

#correlation of the overlapping
returns["MSFT"].corr(returns["IBM"])
returns["MSFT"].cov(returns["IBM"])

returns.corr(), returns.cov()

#compute pairwise correlations between columns or rows with another dataframe 
returns.corrwith(returns["IBM"])

AAPL    0.386817
GOOG    0.405099
IBM     1.000000
MSFT    0.499764
dtype: float64

In [87]:
#Unique values, counts and membership
obj = pd.Series(["c", "a", "d", "a", "a", "b", "b", "c", "c"])
#shows only unique values DISTINCT
obj.unique()

#count duplicate values frequencies
obj.value_counts()

#you can combine if numpy is around and in descending order
pd.value_counts(obj.to_numpy(), sort=False)

#check if value exists
filtr = obj.isin(["b","c"])

#you can use as filter
obj[filtr]

#you can call index numbers
to_match = pd.Series(["c", "a", "b", "b", "c", "a"])
unique_vals = pd.Series(["c", "b", "a"])
indices = pd.Index(unique_vals).get_indexer(to_match)
indices

#value counts
data = pd.DataFrame({"Qu1": [1, 3, 4, 3, 4],
                     "Qu2": [2, 3, 1, 2, 3],
                     "Qu3": [1, 5, 2, 4, 4]})
data["Qu1"].value_counts().sort_index()

#for all columns
data.apply(pd.value_counts).fillna(0)

#if you need value counts for multiindex combinations
data = pd.DataFrame({"a": [1, 1, 1, 2, 2], "b": [0, 0, 1, 0, 0]})
data.value_counts()

a  b
1  0    2
2  0    2
1  1    1
dtype: int64