In [1]:
import pandas as pd
import numpy as np

# Creating Data Frame from scratch

### Using Dictionary

In [2]:
df = pd.DataFrame({'id': [100, 101, 102], 'color': ['red', 'blue', 'red']}, columns = ['id', 'color'], index = ['a', 'b', 'c'])
df

Unnamed: 0,id,color
a,100,red
b,101,blue
c,102,red


### Using List of Lists

In [3]:
df1 = pd.DataFrame([[100, 'red'], [101, 'blue'], [102, 'red']], columns = ['id', 'color'])
df1

Unnamed: 0,id,color
0,100,red
1,101,blue
2,102,red


### Using Numpy Arrays

In [4]:
import numpy as np

In [5]:
arr = np.random.rand(4, 2)
arr

array([[ 0.58656317,  0.09798625],
       [ 0.56463191,  0.94278332],
       [ 0.65879229,  0.61278397],
       [ 0.97149423,  0.34419614]])

In [6]:
pd.DataFrame(arr, columns = ['x', 'y'])

Unnamed: 0,x,y
0,0.586563,0.097986
1,0.564632,0.942783
2,0.658792,0.612784
3,0.971494,0.344196


### Generating Large Data Set

In [7]:
df2 = pd.DataFrame({'student': np.arange(100, 105, 1), 'test': np.random.randint(60, 101, 5)}).set_index('student')
df2

Unnamed: 0_level_0,test
student,Unnamed: 1_level_1
100,82
101,100
102,89
103,66
104,93


### Attaching a series to a DataFrame

In [8]:
s = pd.Series(['round', 'square'], index = ['c', 'b'], name = 'shapes')
s

c     round
b    square
Name: shapes, dtype: object

In [9]:
df

Unnamed: 0,id,color
a,100,red
b,101,blue
c,102,red


In [10]:
df3 = pd.concat([df, s], axis = 'columns')
df3

Unnamed: 0,id,color,shapes
a,100,red,
b,101,blue,square
c,102,red,round


# Creating new Column

In [11]:
df3['mew'] = df3.color + ', ' + df3.shapes
df3

Unnamed: 0,id,color,shapes,mew
a,100,red,,
b,101,blue,square,"blue, square"
c,102,red,round,"red, round"


# Deleting a Column

In [12]:
# del df3['mew'] - works too

df3.drop(['mew'], axis = 1, inplace = True)
df3

Unnamed: 0,id,color,shapes
a,100,red,
b,101,blue,square
c,102,red,round


axis = 0, or axis = 'index'. Goes from top to bottom through rows  
axis = 1, or axis = 'columns'. Goes from left to right through columns

# Deleting a Row

In [13]:
df3.drop(['a'], axis = 0, inplace = True)
df3

Unnamed: 0,id,color,shapes
b,101,blue,square
c,102,red,round


# List Comprehension on a Series

In [14]:
[s for s in df3.shapes]

['square', 'round']

In [15]:
['{} - {}, {}'.format(index, row.color, row.shapes) for index, row in df3.iterrows()]

['b - blue, square', 'c - red, round']

# Getting rid of non numeric Columns

In [16]:
df.select_dtypes(include = [np.number]).head()

Unnamed: 0,id
a,100
b,101
c,102
