*****Basic data structures in pandas*****

In [1]:
import pandas as pd
import numpy as np

**Object creation**

In [2]:
s=pd.Series([1,2,3,np.nan,7,8])
s

0    1.0
1    2.0
2    3.0
3    NaN
4    7.0
5    8.0
dtype: float64

In [3]:
dates=pd.date_range("20130101",periods=6)
dates

DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03', '2013-01-04',
               '2013-01-05', '2013-01-06'],
              dtype='datetime64[ns]', freq='D')

In [4]:
df=pd.DataFrame(np.random.rand(6,4), index=dates, columns=list("ABCD"))
df

Unnamed: 0,A,B,C,D
2013-01-01,0.584267,0.110783,0.139468,0.59148
2013-01-02,0.090995,0.377773,0.539741,0.117804
2013-01-03,0.269757,0.643941,0.507938,0.428964
2013-01-04,0.812012,0.401939,0.888026,0.747942
2013-01-05,0.10156,0.782753,0.612346,0.749196
2013-01-06,0.766901,0.058541,0.830892,0.528883


In [5]:
df2=pd.DataFrame(
    {
        "A":1.0,
        "B":pd.Timestamp("20130101"),
        "C": pd.Series([1,2,3,4], index=list(range(4)), dtype="float32"),
        "D": np.array([3]*4,dtype="int32"),
        "E":pd.Categorical(["test","train","test","train"]),
        "F":"foo faa"
    }
)
df2

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-01,1.0,3,test,foo faa
1,1.0,2013-01-01,2.0,3,train,foo faa
2,1.0,2013-01-01,3.0,3,test,foo faa
3,1.0,2013-01-01,4.0,3,train,foo faa


**Viewing data**

In [6]:
df2.dtypes

A          float64
B    datetime64[s]
C          float32
D            int32
E         category
F           object
dtype: object

In [7]:
# df2.info()
# df2.index
# df2.columns
df2.T.T

Unnamed: 0,A,B,C,D,E,F
0,1.0,2013-01-01 00:00:00,1.0,3,test,foo faa
1,1.0,2013-01-01 00:00:00,2.0,3,train,foo faa
2,1.0,2013-01-01 00:00:00,3.0,3,test,foo faa
3,1.0,2013-01-01 00:00:00,4.0,3,train,foo faa


In [None]:
df.head()

In [8]:
#axis 1 is reprsenting columns change
df.sort_index(axis=1, ascending=False)  
#axis 0 is representing row change
df.sort_index(axis=0, ascending=False)

Unnamed: 0,A,B,C,D
2013-01-06,0.766901,0.058541,0.830892,0.528883
2013-01-05,0.10156,0.782753,0.612346,0.749196
2013-01-04,0.812012,0.401939,0.888026,0.747942
2013-01-03,0.269757,0.643941,0.507938,0.428964
2013-01-02,0.090995,0.377773,0.539741,0.117804
2013-01-01,0.584267,0.110783,0.139468,0.59148


In [None]:
df.sort_values(by="A" and "B")

Unnamed: 0,A,B,C,D
2013-01-06,0.766901,0.058541,0.830892,0.528883
2013-01-01,0.584267,0.110783,0.139468,0.59148
2013-01-02,0.090995,0.377773,0.539741,0.117804
2013-01-04,0.812012,0.401939,0.888026,0.747942
2013-01-03,0.269757,0.643941,0.507938,0.428964
2013-01-05,0.10156,0.782753,0.612346,0.749196


**Getitem ([])**

In [13]:
df[["A","B"]]

Unnamed: 0,A,B
2013-01-01,0.584267,0.110783
2013-01-02,0.090995,0.377773
2013-01-03,0.269757,0.643941
2013-01-04,0.812012,0.401939
2013-01-05,0.10156,0.782753
2013-01-06,0.766901,0.058541


In [14]:
df[0:3]

Unnamed: 0,A,B,C,D
2013-01-01,0.584267,0.110783,0.139468,0.59148
2013-01-02,0.090995,0.377773,0.539741,0.117804
2013-01-03,0.269757,0.643941,0.507938,0.428964


**Selection by label**

In [15]:
df.iloc[0:5,0:3]

Unnamed: 0,A,B,C
2013-01-01,0.584267,0.110783,0.139468
2013-01-02,0.090995,0.377773,0.539741
2013-01-03,0.269757,0.643941,0.507938
2013-01-04,0.812012,0.401939,0.888026
2013-01-05,0.10156,0.782753,0.612346


In [16]:
df.loc[:,["A","B"]]

Unnamed: 0,A,B
2013-01-01,0.584267,0.110783
2013-01-02,0.090995,0.377773
2013-01-03,0.269757,0.643941
2013-01-04,0.812012,0.401939
2013-01-05,0.10156,0.782753
2013-01-06,0.766901,0.058541


In [18]:
df.iat[1,2]

np.float64(0.5397410528992883)

**bolean Indexing**

In [19]:
import seaborn as sns
kashti=sns.load_dataset("titanic")
kashti

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.0500,S,Third,man,True,,Southampton,no,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,0,2,male,27.0,0,0,13.0000,S,Second,man,True,,Southampton,no,True
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True
888,0,3,female,,1,2,23.4500,S,Third,woman,False,,Southampton,no,False
889,1,1,male,26.0,0,0,30.0000,C,First,man,True,C,Cherbourg,yes,True


In [22]:
kashti.sample(100)

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
556,1,1,female,48.0,1,0,39.6000,C,First,woman,False,A,Cherbourg,yes,False
119,0,3,female,2.0,4,2,31.2750,S,Third,child,False,,Southampton,no,False
376,1,3,female,22.0,0,0,7.2500,S,Third,woman,False,,Southampton,yes,True
232,0,2,male,59.0,0,0,13.5000,S,Second,man,True,,Southampton,no,True
336,0,1,male,29.0,1,0,66.6000,S,First,man,True,C,Southampton,no,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
94,0,3,male,59.0,0,0,7.2500,S,Third,man,True,,Southampton,no,True
853,1,1,female,16.0,0,1,39.4000,S,First,woman,False,D,Southampton,yes,False
596,1,2,female,,0,0,33.0000,S,Second,woman,False,,Southampton,yes,True
100,0,3,female,28.0,0,0,7.8958,S,Third,woman,False,,Southampton,no,True


In [24]:
kashti[kashti["age"]<5]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
7,0,3,male,2.0,3,1,21.075,S,Third,child,False,,Southampton,no,False
10,1,3,female,4.0,1,1,16.7,S,Third,child,False,G,Southampton,yes,False
16,0,3,male,2.0,4,1,29.125,Q,Third,child,False,,Queenstown,no,False
43,1,2,female,3.0,1,2,41.5792,C,Second,child,False,,Cherbourg,yes,False
63,0,3,male,4.0,3,2,27.9,S,Third,child,False,,Southampton,no,False
78,1,2,male,0.83,0,2,29.0,S,Second,child,False,,Southampton,yes,False
119,0,3,female,2.0,4,2,31.275,S,Third,child,False,,Southampton,no,False
164,0,3,male,1.0,4,1,39.6875,S,Third,child,False,,Southampton,no,False
171,0,3,male,4.0,4,1,29.125,Q,Third,child,False,,Queenstown,no,False
172,1,3,female,1.0,1,1,11.1333,S,Third,child,False,,Southampton,yes,False


In [33]:
kashti[kashti["alive"]=="yes"]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.9250,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
9,1,2,female,14.0,1,0,30.0708,C,Second,child,False,,Cherbourg,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
875,1,3,female,15.0,0,0,7.2250,C,Third,child,False,,Cherbourg,yes,True
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
887,1,1,female,19.0,0,0,30.0000,S,First,woman,False,B,Southampton,yes,True


In [29]:
kashti[kashti["alone"]==False]

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.2500,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
3,1,1,female,35.0,1,0,53.1000,S,First,woman,False,C,Southampton,yes,False
7,0,3,male,2.0,3,1,21.0750,S,Third,child,False,,Southampton,no,False
8,1,3,female,27.0,0,2,11.1333,S,Third,woman,False,,Southampton,yes,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
874,1,2,female,28.0,1,0,24.0000,C,Second,woman,False,,Cherbourg,yes,False
879,1,1,female,56.0,0,1,83.1583,C,First,woman,False,C,Cherbourg,yes,False
880,1,2,female,25.0,0,1,26.0000,S,Second,woman,False,,Southampton,yes,False
885,0,3,female,39.0,0,5,29.1250,Q,Third,woman,False,,Queenstown,no,False
