# Introduction to Pandas

## How to open data files using Pandas? 

### How to open CSV Files using Pandas?

In [1]:
## import Pandas
import pandas as pd

In [8]:
## read CSV File
df = pd.read_csv('./data/the_office/the_office_lines.csv')
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


In [9]:
## read CSV with encoding
df = pd.read_csv('./data/the_office/the_office_lines.csv', encoding='utf-8')
## print the first 5 rows of the dataframe
df.head()


Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


In [10]:
## read CSV with different header
df = pd.read_csv('./data/the_office/the_office_lines.csv', header=1)
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,0,Michael,All right Jim. Your quarterlies look very good. How are things at the library?,1,1.1
0,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
1,2,Michael,So you’ve come to the master for guidance? Is...,1,1
2,3,Jim,"Actually, you called me in here, but yeah.",1,1
3,4,Michael,"All right. Well, let me show you how it’s don...",1,1
4,5,Michael,"[on the phone] Yes, I’d like to speak to your...",1,1


In [14]:
## read CSV with custom header
df = pd.read_csv('./data/the_office/the_office_lines.csv', names=["col1" , "col2", "col3", "col4", "col5"], header=0)
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,col1,col2,col3,col4,col5
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


In [15]:
## read CSV with index column
df = pd.read_csv('./data/the_office/the_office_lines.csv', index_col=0)
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0_level_0,Character,Line,Season,Episode_Number
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,Jim,"Actually, you called me in here, but yeah.",1,1
4,Michael,"All right. Well, let me show you how it’s don...",1,1


### How to open TSV Files

In [17]:
## read TSV File
df = pd.read_csv('./data/the_office/the_office_lines.tsv', delimiter='\t')
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


In [18]:
## read TSV File with sep parameter
df = pd.read_csv('./data/the_office/the_office_lines.tsv', sep='\t')
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


### Reading JSON file

In [22]:
## read JSON File
df = pd.read_json('./data/the_office/the_office_lines.json')
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1


### Reading Excel file

In [23]:
## read Excel File
df = pd.read_excel('./data/the_office/the_office_lines.xlsx')
## print the first 5 rows of the dataframe
df.head()

Unnamed: 0,Index,Character,Line,Season,Episode_Number
0,0,Michael,All right Jim. Your quarterlies look very goo...,1,1
1,1,Jim,"Oh, I told you. I couldn’t close it. So…",1,1
2,2,Michael,So you’ve come to the master for guidance? Is...,1,1
3,3,Jim,"Actually, you called me in here, but yeah.",1,1
4,4,Michael,"All right. Well, let me show you how it’s don...",1,1
