In [3]:
import numpy as np
import pandas as pd

a = np.array([4,7,-5,3])
obj = pd.Series(a)
obj.array, obj.index

(<PandasArray>
 [4, 7, -5, 3]
 Length: 4, dtype: int32,
 RangeIndex(start=0, stop=4, step=1))

In [12]:
#define array and attach your own desired index column with their values
obj2 = pd.Series([4,7,-5,3], index=["d","b","a","c"])
obj2

#if you want to check specific value by calling the index value:
obj2["a"]
#you can set value to specific row
obj2["c"]=6
obj2

#try showing only specific rows as a list of ndexes
rows = np.array(["c","a"])
obj2[rows]

#you can use operations to show you only those values that filter
obj2[obj2 > 0]

#you can do some math too
obj2[obj2 > 0]*2

#you can do some other complex math
np.exp(obj2)

#you can check true false operation too
"a" in obj2, "w" in obj2

(True, False)

In [18]:
#passing data from dictionary into Series where key is index and value is normal value
sdata = {"Ohio": 35000, "Texas": 71000, "Oregon": 16000, "Utah": 5000}
obj3 = pd.Series(sdata)
obj3

#convert back to dictionary:
a = obj3.to_dict()
a

#you can override passing the index with dictionary keys
#it will try to match states with dictionary key. Ex:California is not in dict key and that is why it is NULL
states = ["California", "Ohio", "Oregon", "Texas"]
obj4 = pd.Series(sdata, index=states)
obj3, obj4

#is null and is not null
pd.isna(obj4), pd.notna(obj4)

#the same thing you can do it like this
obj4.isna(), obj4.notna()

(Ohio      35000
 Texas     71000
 Oregon    16000
 Utah       5000
 dtype: int64,
 California        NaN
 Ohio          35000.0
 Oregon        16000.0
 Texas         71000.0
 dtype: float64)

In [22]:
#pandas will automatically match 2 dataset with the same index. Pandas has automatic joins in index values
obj3+obj4

#you can give column name for the values in pandas
obj4.name = "population"
#you can assign index column name too
obj4.index.name = "state"
obj4

#you can additionally assign different values for the index
a = np.array([4,3,-1,7])
b = pd.Series(a)

b.index = ["Bob", "Steve", "Jeff", "Ryan"]
b

Bob      4
Steve    3
Jeff    -1
Ryan     7
dtype: int32

DATA FRAME

In [48]:
#Data Frame
data = {"state": ["Ohio", "Ohio", "Ohio", "Nevada", "Nevada", "Nevada"],
        "year": [2000, 2001, 2002, 2001, 2002, 2003],
        "pop": [1.5, 1.7, 3.6, 2.4, 2.9, 3.2]}
frame = pd.DataFrame(data)

#head() method shws first 5 rows and tail() method shows last 5 rows, or you can show just 2 rows
frame.head(), frame.head(2), frame.tail()

#if you want you can arrange the order of the columns
pd.DataFrame(data, columns = ["year", "state","pop"])

#try to add column that is not defined in the dictionary, it will create that new column but with null values
frame2 = pd.DataFrame(data, columns = ["year", "state","pop","debt"])
frame2

#show column names
frame2.columns

#you can show just one column with its data values like Series
frame2["state"]

#or you can do it like this, but be carrefull, this can conflict with 
#Python variable names in syntax and can show you error. If column name contain whitespace, any symbols
frame2.state

#you can retrieve row with .loc or .iloc attributes
frame2.loc[1], frame2.iloc[2]

#you can assign value by the column in all rows
frame2["debt"] = 16.5

#or you can arrange series of numbers
frame2["debt"] = np.arange(6.)

#you can assign from series
val = pd.Series([-1.2,-1.5,-1.7], index=[2,4,5])
frame2["debt"] = val

#you can add column that does not exist and assign the value for that col
frame2["eastern"] = frame2["state"] == "Ohio"

#do some math and add that column
frame2["totalAmt"] = frame2["pop"] * frame2["debt"]
frame2

Unnamed: 0,year,state,pop,debt,eastern,totalAmt
0,2000,Ohio,1.5,,True,
1,2001,Ohio,1.7,,True,
2,2002,Ohio,3.6,-1.2,True,-4.32
3,2001,Nevada,2.4,,False,
4,2002,Nevada,2.9,-1.5,False,-4.35
5,2003,Nevada,3.2,-1.7,False,-5.44


In [49]:
#you can delete column
del frame2["eastern"]
frame2

Unnamed: 0,year,state,pop,debt,totalAmt
0,2000,Ohio,1.5,,
1,2001,Ohio,1.7,,
2,2002,Ohio,3.6,-1.2,-4.32
3,2001,Nevada,2.4,,
4,2002,Nevada,2.9,-1.5,-4.35
5,2003,Nevada,3.2,-1.7,-5.44


In [60]:
#nested dictionary
#outer dictionary will represent columns and inner dictionary will represent rows indexes
populations = {"Ohio": {2000: 1.5, 2001: 1.7, 2002: 3.6}, 
               "Nevada": {2001: 2.4, 2002: 2.9}}
frame3 = pd.DataFrame(populations)

#but you can transpose if you want
frame3, frame3.T

#you can explicitly assign which will represent the index
a = pd.DataFrame(populations, index=[2001, 2002, 2003])

#you can slice values if you need
pdata = {"Ohio": frame3["Ohio"].iloc[:-1], 
         "Nevada": frame3["Nevada"].iloc[:2]}
a = pd.DataFrame(pdata)

#try to give the name of the index and column but it will treat them as Series
#Data Frame does not have "name" attribute
frame3.index.name = "year"
frame3.columns.name = "state"

#when you convert dataframe to numpy, it will treat the table as a matrix without the name attributes
frame3.to_numpy()
#or like this, it is the same result
np.array(frame3)

#you can retrieve dataframe with different data types to numpy
np.array(frame3),np.array(frame2)

(array([[1.5, nan],
        [1.7, 2.4],
        [3.6, 2.9]]),
 array([[2000, 'Ohio', 1.5, nan, nan],
        [2001, 'Ohio', 1.7, nan, nan],
        [2002, 'Ohio', 3.6, -1.2, -4.32],
        [2001, 'Nevada', 2.4, nan, nan],
        [2002, 'Nevada', 2.9, -1.5, -4.35],
        [2003, 'Nevada', 3.2, -1.7, -5.44]], dtype=object))

Index Objects

In [74]:
#any array or other sequence of labels you use when constructing DataFrame or Series, is internally converted to index
obj = pd.Series(np.arange(3), index=["a","b","c"])
obj.index[1:]

#define index externaly
labelind = [0,1,2]
label = pd.Index(labelind)
label
obj2 = pd.Series([1.5,-2.5,0], index=label)

#you can check if a value exist in index or column
"Ohio" in frame3.columns, 2002 in frame3.index

(True, True)

In [103]:
#indexes in pandas can contain duplicates
pd.Index(["foo", "foo", "bar", "bar"])

#show only different indexes
frame3.index.difference([2001,2003,2004,2005])

#concat() work only if number of columns are the same
df1 = pd.DataFrame(
    {
        "A": ["A0", "A1", "A2", "A3"],
        "B": ["B0", "B1", "B2", "B3"],
        "C": ["C0", "C1", "C2", "C3"],
        "D": ["D0", "D1", "D2", "D3"],
    },
    index=[0, 1, 2, 3],
)

df2 = pd.DataFrame(
    {
        "A": ["A4", "A5", "A6", "A7"],
        "B": ["B4", "B5", "B6", "B7"],
        "C": ["C4", "C5", "C6", "C7"],
        "D": ["D4", "D5", "D6", "D7"],
    },
    index=[0, 5, 2, 7],
)

df3 = pd.DataFrame(
    {
        "A": ["A8", "A9", "A10", "A11"],
        "B": ["B8", "B9", "B10", "B11"],
        "C": ["C8", "C9", "C10", "C11"],
        "D": ["D8", "D9", "D10", "D11"],
    },
    index=[8, 9, 3, 11],
)

concatFrame = [df1,df2,df3]
result = pd.concat(concatFrame)

#you can mark those 3 dataframes
result = pd.concat(concatFrame, keys=["x", "y", "z"])
result, result.loc["x"]

#you can concatenate through columns
df4 = pd.DataFrame(
    {
        "B": ["B2", "B3", "B6", "B7"],
        "D": ["D2", "D3", "D6", "D7"],
        "F": ["F2", "F3", "F6", "F7"],
    },
    index=[2, 3, 6, 7],
)
result = pd.concat([df1,df4], axis=1)
result

Unnamed: 0,A,B,C,D,B.1,D.1,F
0,A0,B0,C0,D0,,,
1,A1,B1,C1,D1,,,
2,A2,B2,C2,D2,B2,D2,F2
3,A3,B3,C3,D3,B3,D3,F3
6,,,,,B6,D6,F6
7,,,,,B7,D7,F7
