In [1]:
import numpy as np
import pandas as pd

### DataFrame

### creating a DF 

In [2]:
data = {'state':['a','b','c','d'],
       'year':[2001,2002,2003,2004],
       'data':[1,2,3,4]}
df = pd.DataFrame(data)
df

Unnamed: 0,state,year,data
0,a,2001,1
1,b,2002,2
2,c,2003,3
3,d,2004,4


### for large datasets, selecting few rows with head function

In [3]:
df.head()

Unnamed: 0,state,year,data
0,a,2001,1
1,b,2002,2
2,c,2003,3
3,d,2004,4


### by default head selects first 5 rows

In [4]:
df.head(1)

Unnamed: 0,state,year,data
0,a,2001,1


### arranging the columns based on given order

In [5]:
df1 = pd.DataFrame(data, columns=['year', 'data', 'state'])
df1

Unnamed: 0,year,data,state
0,2001,1,a
1,2002,2,b
2,2003,3,c
3,2004,4,d


In [6]:
df

Unnamed: 0,state,year,data
0,a,2001,1
1,b,2002,2
2,c,2003,3
3,d,2004,4


### unknown column values are filled with NaN

In [7]:
df2 = pd.DataFrame(data, columns=['coll', 'data', 'year'])
df2

Unnamed: 0,coll,data,year
0,,1,2001
1,,2,2002
2,,3,2003
3,,4,2004


### retrieving columns values

In [8]:
df['data']

0    1
1    2
2    3
3    4
Name: data, dtype: int64

In [9]:
df['state']

0    a
1    b
2    c
3    d
Name: state, dtype: object

In [10]:
df

Unnamed: 0,state,year,data
0,a,2001,1
1,b,2002,2
2,c,2003,3
3,d,2004,4


In [11]:
df.year

0    2001
1    2002
2    2003
3    2004
Name: year, dtype: int64

In [12]:
df

Unnamed: 0,state,year,data
0,a,2001,1
1,b,2002,2
2,c,2003,3
3,d,2004,4


### loc gives the row detailes, you need to specify the index label

In [14]:
df.loc[3]

state       d
year     2004
data        4
Name: 3, dtype: object

### creating extra columns with a given values

In [23]:
df['temp'] = 25

In [24]:
df

Unnamed: 0,state,year,data,temp
0,a,2001,1,25
1,b,2002,2,25
2,c,2003,3,25
3,d,2004,4,25


### assigning values using functions

In [26]:
df['debt'] = np.arange(4)
df

Unnamed: 0,state,year,data,temp,debt
0,a,2001,1,25,0
1,b,2002,2,25,1
2,c,2003,3,25,2
3,d,2004,4,25,3


### deleting the columns

In [27]:
df

Unnamed: 0,state,year,data,temp,debt
0,a,2001,1,25,0
1,b,2002,2,25,1
2,c,2003,3,25,2
3,d,2004,4,25,3


In [28]:
del df['temp']

In [29]:
df

Unnamed: 0,state,year,data,debt
0,a,2001,1,0
1,b,2002,2,1
2,c,2003,3,2
3,d,2004,4,3


In [30]:
df.columns

Index(['state', 'year', 'data', 'debt'], dtype='object')

### nested dicts

In [31]:
data = {
    'ktaka':{
        '2001':10,
        '2003':30,
        '2005':50,
    },
    'maha':{
        '2001':10,
        '2002':20,
        '2003':30,
        '2004':40,
    }
}

In [32]:
df = pd.DataFrame(data)
df

Unnamed: 0,ktaka,maha
2001,10.0,10.0
2002,,20.0
2003,30.0,30.0
2004,,40.0
2005,50.0,


### NaN is placed where values are not available
#### row is made from the nested dict

In [33]:
df

Unnamed: 0,ktaka,maha
2001,10.0,10.0
2002,,20.0
2003,30.0,30.0
2004,,40.0
2005,50.0,


### transposing the dataset

In [34]:
df.T

Unnamed: 0,2001,2002,2003,2004,2005
ktaka,10.0,,30.0,,50.0
maha,10.0,20.0,30.0,40.0,


### setting names to columns and rows

In [35]:
df

Unnamed: 0,ktaka,maha
2001,10.0,10.0
2002,,20.0
2003,30.0,30.0
2004,,40.0
2005,50.0,


In [37]:
df.columns.name = 'States'

In [38]:
df.index.name = 'Year'

In [39]:
df

States,ktaka,maha
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,10.0,10.0
2002,,20.0
2003,30.0,30.0
2004,,40.0
2005,50.0,


In [40]:
df.values

array([[10., 10.],
       [nan, 20.],
       [30., 30.],
       [nan, 40.],
       [50., nan]])

In [41]:
df.index

Index(['2001', '2002', '2003', '2004', '2005'], dtype='object', name='Year')

In [42]:
df.columns

Index(['ktaka', 'maha'], dtype='object', name='States')

In [43]:
df

States,ktaka,maha
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
2001,10.0,10.0
2002,,20.0
2003,30.0,30.0
2004,,40.0
2005,50.0,


In [44]:
'ktaka' in df.columns

True

In [45]:
'2002' in df.index

True