In [1]:
import pandas as pd
import numpy as np


## Creating DataFrames

In [54]:
df = pd.DataFrame({'numbers': [1, 2, 3], 'colors': ['red', 'white', 'blue']})

In [55]:
df

Unnamed: 0,numbers,colors
0,1,red
1,2,white
2,3,blue


In [56]:
df = pd.DataFrame({'numbers': [1, 2, 3], 'colors': ['red', 'white', 'blue']} \
                  ,columns=["numbers","colors"])

In [57]:
df

Unnamed: 0,numbers,colors
0,1,red
1,2,white
2,3,blue


### Create a DataFrame of random numbers:

In [64]:
df = pd.DataFrame(np.random.randn(5, 4), columns=list('ABCD')) # with floating no.

In [65]:
df

Unnamed: 0,A,B,C,D
0,0.151073,-0.327224,0.389413,0.627434
1,-1.237907,-1.566551,-0.966586,-1.571249
2,-0.006582,0.111723,0.658752,-0.962237
3,-0.058349,1.159683,1.698902,0.321119
4,0.754529,-0.195814,0.51704,0.577977


In [66]:
df = pd.DataFrame(np.arange(15).reshape(5,3),columns=list('ABC')) # with inger no.

In [67]:
df

Unnamed: 0,A,B,C
0,0,1,2
1,3,4,5
2,6,7,8
3,9,10,11
4,12,13,14


### Create a sample DataFrame with datetime

In [68]:
# create an array of 5 dates starting at '2015-02-24', one per minute 
rng = pd.date_range('2015-02-24', periods=5, freq='T') 
df = pd.DataFrame({ 'Date': rng, 'Val': np.random.randn(len(rng)) })

In [69]:
df

Unnamed: 0,Date,Val
0,2015-02-24 00:00:00,-1.160817
1,2015-02-24 00:01:00,-0.802845
2,2015-02-24 00:02:00,-1.025889
3,2015-02-24 00:03:00,-0.347684
4,2015-02-24 00:04:00,0.29516


In [80]:
rng = pd.date_range('2015-02-24', periods=5, freq='D') # freq = check the alias
df = pd.DataFrame({ 'Date': rng})

In [81]:
df

Unnamed: 0,Date
0,2015-02-24
1,2015-02-25
2,2015-02-26
3,2015-02-27
4,2015-02-28


In [None]:
"Alias    \ 
Description \

B         business day frequency \
C         custom business day frequency (experimental) \
D         calendar day frequency  \
W         weekly frequency \
M         month end frequency \
BM        business month end frequency \
CBM       custom business month end frequency \
MS        month start frequency \
BMS       business month start frequency \
CBMS      custom business month start frequency \
Q         quarter end frequency \
BQ        business quarter endfrequency \
QS        quarter start frequency \
BQS       business quarter start frequency \
A         year end frequency \
BA        business year end frequency \
AS        year start frequency \
BAS       business year start frequency \
BH        business hour frequency \
H         hourly frequency \
T, min    minutely frequency \
S         secondly frequency \
L, ms     milliseconds \
U, us     microseconds \
N         nanoseconds"

#### MultiIndexing

In [82]:
idx = pd.MultiIndex.from_product([['bar', 'baz', 'foo', 'qux'],['one','two']])

In [83]:
idx

MultiIndex([('bar', 'one'),
            ('bar', 'two'),
            ('baz', 'one'),
            ('baz', 'two'),
            ('foo', 'one'),
            ('foo', 'two'),
            ('qux', 'one'),
            ('qux', 'two')],
           )

In [93]:
df = pd.DataFrame(np.random.randint(5, size=(8, 2)), index=idx, columns=['A', 'B'])

In [94]:
df

Unnamed: 0,Unnamed: 1,A,B
bar,one,2,4
bar,two,1,4
baz,one,0,1
baz,two,3,0
foo,one,0,1
foo,two,4,4
qux,one,4,2
qux,two,1,2


### Creating an empty Data Frame

In [2]:
df = pd.DataFrame()

In [3]:
df

### Appending a new row to Data Frame

In [4]:
df = pd.DataFrame(columns=["A","B","C"])

In [5]:
df

Unnamed: 0,A,B,C


#####  Appending a row by a single column value

In [6]:
df.loc[0,"A"]=1

In [7]:
df

Unnamed: 0,A,B,C
0,1,,


In [8]:
df.loc[0,"A"]=5   # the values are mutable

In [9]:
df

Unnamed: 0,A,B,C
0,5,,


#### Appending a row, given list of values:

In [10]:
df.loc[1] = [4,5,6]

In [11]:
df

Unnamed: 0,A,B,C
0,5,,
1,4,5.0,6.0


#### Appending a row given a dictionary:

In [12]:
df.loc[2] = {"A":3,"B":6,"C":7}

In [13]:
df

Unnamed: 0,A,B,C
0,5,,
1,4,5.0,6.0
2,3,6.0,7.0


#### Adding a column

In [22]:
df["D"] = [int(x) for x in range(3,12,3)]

In [23]:
df

Unnamed: 0,A,B,C,D
0,5,,,3
1,4,5.0,6.0,6
2,3,6.0,7.0,9


In [31]:
# appending the NaN value

df.loc[0,"B"] = 45

In [29]:
df

Unnamed: 0,A,B,C,D
0,5,45,,3
1,4,5,6.0,6
2,3,6,7.0,9


In [95]:
df = pd.DataFrame({"color": ['red', 'blue', 'red', 'blue',"red"],"name":["rose","violet","tulip","harebell","lily"], \
                  "size":["big","big","small","small","small"]})

In [96]:
df

Unnamed: 0,color,name,size
0,red,rose,big
1,blue,violet,big
2,red,tulip,small
3,blue,harebell,small
4,red,lily,small


In [38]:
df["size"].describe()

count         5
unique        2
top       small
freq          3
Name: size, dtype: object

In [41]:
df[df["name"] == "tulip"]  # single filtering

Unnamed: 0,color,name,size
2,red,tulip,small


In [43]:
df[ (df["name"] == "tulip") | (df["name"] == "harebell")]  # or condition parenthesis are very imp

Unnamed: 0,color,name,size
2,red,tulip,small
3,blue,harebell,small


In [47]:
df[ (df["name"] == "tulip") & (df["size"] == "small")] # and condn with ampersand

Unnamed: 0,color,name,size
2,red,tulip,small


### List DataFrame column names

In [98]:
list(df)

['color', 'name', 'size']

In [99]:
[c for c in df]

['color', 'name', 'size']

In [101]:
df.columns.tolist()

['color', 'name', 'size']

In [102]:
df.columns

Index(['color', 'name', 'size'], dtype='object')