# Tutorial-1: Importing/exporting data from/to various sources using Pandas library
### A- Import pandas library

In [2]:
import pandas as pd

### B- If your data is on your computer in the same folder as your code:

In [None]:
# for csv files
df = pd.read_csv('YOUR_DATA.csv') # don't forget the '.csv'

# for excel files
df = pd.read_excel('YOUR_DATA.xlsx') 

### C- If your file is in different directory, write the full path:

In [None]:
df = pd.read_csv(r'Path where the YOUR_DATA file is stored\YOUR_DATA.csv')

### D- Importing data directly off websites

In [None]:
# Importing data directly off websites
url = 'Dataset URL'  #Enter URL of dataset
df = pd.read_csv(url)  # for csv files 

# for excel files
df = pd.read_excel(url)

### E- Your file is space separated with no column names, and you'd like to add column header

In [None]:
# space separated data with no column names, and you'd like to add column header
df = pd.read_csv(url, delimiter= ' ', names=['A','B','C','D'])

### F- If your data is in a zipped folder:

In [None]:
from io import BytesIO
from zipfile import ZipFile
import requests

url = 'URL'

content = requests.get(url)

zf = ZipFile(BytesIO(content.content))

for item in zf.namelist():
    print("File in zip: "+  item)

# find the first matching csv file in the zip:
match = [s for s in zf.namelist() if ".csv" in s][0]

# the first line of the file contains a string use skiprows
df = pd.read_csv(zf.open(match), low_memory=False, skiprows=[0]) #skip the header row

### G- Other useful commands for importing data:

In [None]:
pd.read_table(filename) # From a delimited text file (like TSV)
pd.read_sql(query, connection_object) # Reads from a SQL table/database
pd.read_json(json_string) # Reads from a JSON formatted string, URL or file.

### Converting a dictionary to a dataframe

In [41]:
# Creating a dataframe from a dictionary
dict = {'A':45,'B':28,'C':31}
df = pd.DataFrame(dict, index=[0])  # either pass an index or use [dict] --OR-- df = pd.DataFrame([dict])
df

Unnamed: 0,A,B,C
0,45,28,31


In [4]:
# A more complicated example--With several values in each column

dict={'Column1':[23,34,56,65,78],
      'Column2':[12,31,35,67,89],
      'Column3':[14,43,95,24,76]}


df=pd.DataFrame(dict,columns=['Column1','Column2','Column3'])
df

Unnamed: 0,Column1,Column2,Column3
0,23,12,14
1,34,31,43
2,56,35,95
3,65,67,24
4,78,89,76


### Converting contents of table copied into clipboard to dataframe

In [46]:
df = pd.read_clipboard()  # Just copy a table to clipboard and run this code - Voila!
df

Unnamed: 0,Season,Ordered,Filming,First aired,Last aired,Novel(s) adapted,Ref(s)
0,Season 1,"March 2, 2010",Second half of 2010,"April 17, 2011","June 19, 2011",A Game of Thrones,[71]
1,Season 2,"April 19, 2011",Second half of 2011,"April 1, 2012","June 3, 2012",A Clash of Kings,[72]
2,Season 3,"April 10, 2012",July – November 2012,"March 31, 2013","June 9, 2013",A Storm of Swords,[73]
3,Season 4,"April 2, 2013",July – November 2013,"April 6, 2014","June 15, 2014",A Storm of Swords,[74]
4,Season 5,"April 8, 2014",July – December 2014,"April 12, 2015","June 14, 2015","A Feast for Crows, A Dance with Dragons and or...",[75]
5,Season 6,"April 8, 2014",July – December 2015,"April 24, 2016","June 26, 2016",Outline from The Winds of Winter and original ...,[76]
6,Season 7,"April 21, 2016",August 2016 – February 2017,"July 16, 2017","August 27, 2017",Outline from A Dream of Spring and original co...,[77]
7,Season 8,"July 30, 2016",October 2017 – July 2018,"April 14, 2019","May 19, 2019",Outline from A Dream of Spring and original co...,[78]


### Converting contents of a html table to dataframe

In [50]:
# Parses an html URL, string or file and extracts tables to a list of dataframes
url = 'http://www.fdic.gov/bank/individual/failed/banklist.html'
df = pd.read_html(url)
df[0]

Unnamed: 0,Bank Name,City,ST,CERT,Acquiring Institution,Closing Date
0,The First State Bank,Barboursville,WV,14361,"MVB Bank, Inc.","April 3, 2020"
1,Ericson State Bank,Ericson,NE,18265,Farmers and Merchants Bank,"February 14, 2020"
2,City National Bank of New Jersey,Newark,NJ,21111,Industrial Bank,"November 1, 2019"
3,Resolute Bank,Maumee,OH,58317,Buckeye State Bank,"October 25, 2019"
4,Louisa Community Bank,Louisa,KY,58112,Kentucky Farmers Bank Corporation,"October 25, 2019"
...,...,...,...,...,...,...
556,"Superior Bank, FSB",Hinsdale,IL,32646,"Superior Federal, FSB","July 27, 2001"
557,Malta National Bank,Malta,OH,6629,North Valley Bank,"May 3, 2001"
558,First Alliance Bank & Trust Co.,Manchester,NH,34264,Southern New Hampshire Bank & Trust,"February 2, 2001"
559,National State Bank of Metropolis,Metropolis,IL,3815,Banterra Bank of Marion,"December 14, 2000"


# Writing data to files:

In [None]:
df.to_csv(filename) # Writes to a CSV file
df.to_excel(filename) # Writes to an Excel file
df.to_sql(table_name, connection_object) # Writes to a SQL table
df.to_json(filename) # Writes to a file in JSON format
df.to_html(filename) # Saves as an HTML table
df.to_clipboard() # Writes to the clipboard