# Create a DataFrame

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

## pd.DataFrame.from_dict
* pd.DataFrame.from_dict(data, orient='columns', dtype=None)
    - orient : {'columns', 'index'}

In [8]:
data = {'col1':[1,2,3,4],'col2':['a','b','c','d']}
df_x = pd.DataFrame.from_dict(data)
df_x

Unnamed: 0,col1,col2
0,1,a
1,2,b
2,3,c
3,4,d


In [15]:
data = {'row1':[1,2,3,4],'row2':['a','b','c','d']}
df_x = pd.DataFrame.from_dict(data,orient='index')
df_x

Unnamed: 0,0,1,2,3
row1,1,2,3,4
row2,a,b,c,d


## pd.read_csv

### header: None, int, 'infer', int list

In [31]:
df_x = pd.read_csv('dataframe_csv_load.csv',header=None)
print('load_csv with header=None')
df_x

load_csv with header=None


Unnamed: 0,0,1,2,3
0,id,age,income,chile_num
1,id_0001,22,100000,2
2,id_0002,25,150000,0
3,id_0003,53,220000,1
4,id_0004,62,205000,5
5,id_0005,39,120000,0
6,id_0006,41,80000,0
7,id_0007,26,110000,2
8,id_0008,55,310000,3


In [32]:
df_x = pd.read_csv('dataframe_csv_load.csv',header=0)
print('load_csv with header=0')
df_x

load_csv with header=0


Unnamed: 0,id,age,income,chile_num
0,id_0001,22,100000,2
1,id_0002,25,150000,0
2,id_0003,53,220000,1
3,id_0004,62,205000,5
4,id_0005,39,120000,0
5,id_0006,41,80000,0
6,id_0007,26,110000,2
7,id_0008,55,310000,3


In [34]:
df_x = pd.read_csv('dataframe_csv_load.csv',header='infer')
print('load_csv with header=infer')
df_x

load_csv with header=infer


Unnamed: 0,id,age,income,chile_num
0,id_0001,22,100000,2
1,id_0002,25,150000,0
2,id_0003,53,220000,1
3,id_0004,62,205000,5
4,id_0005,39,120000,0
5,id_0006,41,80000,0
6,id_0007,26,110000,2
7,id_0008,55,310000,3


In [35]:
df_x = pd.read_csv('dataframe_csv_load.csv',header=[0,3,5])
print('load_csv with header=[0,3,5]')
df_x

load_csv with header=[0,3,5]


Unnamed: 0_level_0,id,age,income,chile_num
Unnamed: 0_level_1,id_0003,53,220000,1
Unnamed: 0_level_2,id_0005,39,120000,0
0,id_0006,41,80000,0
1,id_0007,26,110000,2
2,id_0008,55,310000,3


### squeeze: If only one columns is contained in the file, and the 'squeeze' flag is raised, return a Series 

In [41]:
df_x = pd.read_csv('series_csv_load.csv')
print('load one-columned csv')
print("return type is "+str(type(df_x)))
df_x

load one-columned csv
return type is <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,tag value
b,1
a,0
c,2
d,-1


In [42]:
df_x = pd.read_csv('series_csv_load.csv',squeeze=True)
print('load one-columned csv with squeeze is raised')
print("return type is "+str(type(df_x)))
df_x

load one-columned csv with squeeze is raised
return type is <class 'pandas.core.series.Series'>


b    1
a    0
c    2
d   -1
Name: tag value, dtype: int64

### index_col: the column used as indices (key)

In [70]:
df_x = pd.read_csv('dataframe_csv_load.csv')
print('load_csv, without index_col')
df_x

load_csv, without index_col


Unnamed: 0,id,age,income,child_num
0,id_0001,22,100000,2
1,id_0002,25,150000,0
2,id_0003,53,220000,1
3,id_0004,62,205000,5
4,id_0005,39,120000,0
5,id_0006,41,80000,0
6,id_0007,26,110000,2
7,id_0008,55,310000,3


In [71]:
df_x = pd.read_csv('dataframe_csv_load.csv',index_col=['id'])
print('load_csv, index_col = id')
df_x

load_csv, index_col = id


Unnamed: 0_level_0,age,income,child_num
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
id_0001,22,100000,2
id_0002,25,150000,0
id_0003,53,220000,1
id_0004,62,205000,5
id_0005,39,120000,0
id_0006,41,80000,0
id_0007,26,110000,2
id_0008,55,310000,3


### usecols: list, used columns

In [66]:
df_x = pd.read_csv('dataframe_csv_load.csv',usecols=['id','income','child_num'],index_col=['id'])
print('load_csv, with 3 used columns one of which is the index column')
df_x

load_csv, with 3 used columns one of which is the index column


Unnamed: 0_level_0,income,child_num
id,Unnamed: 1_level_1,Unnamed: 2_level_1
id_0001,100000,2
id_0002,150000,0
id_0003,220000,1
id_0004,205000,5
id_0005,120000,0
id_0006,80000,0
id_0007,110000,2
id_0008,310000,3


## pd.read_excel

In [69]:
df_x = pd.read_excel('dataframe_excel_load.xlsx',usecols=['id','income','child_num'],index_col=['id'])
print('load_excel, with 3 used columns one of which is the index column')
df_x

TypeError: list indices must be integers or slices, not str

## pd.read_table

## pd.read_fwf

## pd.read_cliboard

## from_items

## from_records