# Importing Data into Pandas 

## Basic Data Importing Techniques 

In [None]:
# conventional way to import pandas
import pandas as pd 

### Read CSV

In [None]:
# read data from csv file 
diabetes = pd.read_csv("../data/diabetes.csv")

In [None]:
# type 
type(diabetes)

In [None]:
# Examine first few rows 
diabetes.head() 

### Read Excel Sheet

In [None]:
# read data from excel file 
lungcap = pd.read_excel("../data/LungCapData.xls")

In [None]:
type(lungcap)

In [None]:
# examine first few rows 
lungcap.head() 

### From URL

In [None]:
# read a dataset of pulse rate directly from a URL and store the results in a DataFrame 
pulse = pd.read_table('http://media.news.health.ufl.edu/misc/bolt/Intro/SPSS/OriginalData/pulse.txt')

In [None]:
# examine the first 5 rows 
pulse.head()

### Modify Dataset

In [None]:
hepatitis = pd.read_csv('../data/hepatitis.data')

In [None]:
hepatitis.head() 

In [None]:
# Colnames 
col_names = ["Class","AGE","SEX","STEROID","ANTIVIRALS","FATIGUE","MALAISE","ANOREXIA","LIVER BIG",
             "LIVER FIRM","SPLEEN PALPABLE","SPIDERS","ASCITES","VARICES","BILIRUBIN","ALK PHOSPHATE",
             "SGOT","ALBUMIN","PROTIME","HISTOLOGY"]
hepatitis = pd.read_csv('../data/hepatitis.data', names = col_names)

In [None]:
hepatitis.head() 

### Read Biological Data(.txt)

In [None]:
# read text/csv data into pandas 
chrom = pd.read_csv("../data/Encode_HMM_data.txt", delimiter= "\t", header=None)

In [None]:
# Examine first few rows 
chrom.head()

In [None]:
# it's not much better to see. so we have to modify this dataset
cols_name = ['chrom', 'start', 'stop', 'type']
chrom = pd.read_csv("../data/Encode_HMM_data.txt", delimiter="\t", header=None, names = cols_name)

In [None]:
# now examine first few rows 
chrom.head()

### Read Biological Data(.tsv)

In [None]:
pokemon = pd.read_csv("../data/pokemon.tsv", sep="\t")

In [None]:
pokemon.head() 

## Advance Data Importing Techniques

### Importing and Manipulating CSV Files with pd.read_csv()

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("../data/covid19.csv")
# examine first few rows 
df.head() 

In [None]:
# Set index 
df = pd.read_csv("../data/covid19.csv", index_col= "Country/Region")
df.head() 

In [None]:
# Skipping headers 
df = pd.read_csv("../data/covid19.csv", header=None)
df.head() 

In [None]:
# Custom column names 
df = pd.read_csv("../data/covid19.csv", header = 0,
                 names= ["SL", "ObservationDate", "State", "Country", "Last Update", "Confirmed", "Deaths", "Recovered"])
df.head() 

In [None]:
# Use only selected columuns 
df = pd.read_csv("../data/covid19.csv", usecols = ["Country/Region", "Confirmed", "Deaths", "Recovered"])
df.head() 

In [None]:
# Set index and use selected columns 
df = pd.read_csv("../data/covid19.csv", index_col="Country/Region",
                 usecols=["Country/Region", "Confirmed", "Deaths", "Recovered"])
df.head() 

In [None]:
# exploring columns 
df.columns

In [None]:
# Customize columns 
df.columns = ["Confirmed Cases", "Deaths Cases", "Recovered Cases"]

In [None]:
df.columns

In [None]:
# Set index name 
df.index.name = "Country"

In [None]:
df.head()

### Importing messy CSV Files

In [None]:
import pandas as pd

In [None]:
titanic = pd.read_csv("../data/titanic_raw.csv")

In [None]:
titanic.head() 

In [None]:
col_names = ["Survived", "Class", "Gender", "Age", "SipSp", "ParCh", "Fare", "Emb", "Deck"]

In [None]:
titanic = pd.read_csv("../data/titanic_raw.csv", header = None, names = col_names)
titanic.head() 

In [None]:
titanic = pd.read_csv("../data/titanic_raw.csv", skiprows= 3, header = None, names = col_names)
titanic.head() 

In [None]:
titanic.tail(2)

In [None]:
titanic = pd.read_csv("../data/titanic_raw.csv", skiprows= 3, skipfooter = 2, header = None, names = col_names)
titanic.head() 

In [None]:
titanic.tail() 

In [None]:
titanic.to_csv("titanic_clean.csv", index=False)

In [None]:
pd.read_csv("titanic_clean.csv")

### Importing and Manipulating Excel Files with pd.read_excel()

In [None]:
import pandas as pd

In [None]:
df = pd.read_excel("../data/LungCapData.xls")

In [None]:
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, 
                   names=['LungCap', 'Age', 'Height', 'Smoke', 'Gender', 'Caesarean'])
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col=0,  header = 0, usecols = "A:D")
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = "C:E")
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = "A, C:E")
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = ":C")
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = "C:")
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = [0,3,4])
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = 2)
df.head() 

In [None]:
df = pd.read_excel("../data/LungCapData.xls", index_col = 0, header = 0, usecols = ["Gender", "Smoke"])
df.head() 

### Customizing and Handling Multiple Excel Sheets import with pd.read_excel()

In [None]:
import pandas as pd

In [None]:
pd.read_excel("../data/covid19_multiple_sheets.xls")

In [None]:
pd.read_excel("../data/covid19_multiple_sheets.xls", sheet_name = 1)

In [None]:
pd.read_excel("../data/covid19_multiple_sheets.xls", sheet_name = "day1", skiprows= [0,1])

In [None]:
pd.read_excel("../data/covid19_multiple_sheets.xls", sheet_name = "day2", skiprows= 2, usecols= "A:C")

In [None]:
df = pd.read_excel("../data/covid19_multiple_sheets.xls", sheet_name = "day1", skiprows= 2, usecols= "A:D")

In [None]:
df.head()

In [None]:
df.tail()

In [None]:
df.info()

In [None]:
# export file as csv 
df.to_csv("../data/covid19_multiple_sheets_export.csv", index= False)

In [None]:
# export file as Excel 
df.to_excel("../data/covid19_multiple_sheets_export.xls")

In [None]:
pd.read_csv("../data/covid19_multiple_sheets_export.csv")

### Importing Data from the Web  with pd.read_html()

In [None]:
import pandas as pd

In [None]:
url = "https://en.wikipedia.org/wiki/1976_Summer_Olympics_medal_table"

In [None]:
df = pd.read_html(url)

In [None]:
type(df)

In [None]:
df = pd.read_html(url)[0]
df.head() 

In [None]:
wik_1976 = pd.read_html(url)[0]

In [None]:
wik_1976.head()

In [None]:
wik_1976.tail()

In [None]:
url2 ="https://en.wikipedia.org/wiki/1996_Summer_Olympics_medal_table"

In [None]:
pd.read_html(url2)[1]

In [None]:
wik_1996 = pd.read_html(url2)[1]

In [None]:
wik_1996.head()

In [None]:
wik_1996.info()

In [None]:
wik_1976.to_csv("wik_1976.csv", index= False)
wik_1996.to_csv("wik_1996.csv", index= False)