### Importing Data with Python

1. Read a text file

In [None]:
# Import the file to python
filename = 'file.txt'

# Read the file
file = open(filename, mode='r') # r means read

# View the file
text = file.read()

# Close the file
file.close()

Using with statement

In [None]:
# Using context manager with
with open('file.txt', 'r') as file:
    print(file.read())

Using readline()

In [None]:
with open('file.txt') as file:
    print(file.readline())
    print(file.readline())
    print(file.readline())

# used to read all lines from a file and return them as a list of strings.

Flat files

In [None]:
# Text files containing records (table data)

# Import package
import numpy as np

# Load the file
filename = 'file.txt'
data = np.loadtxt(filename, delimiter=',')

# show the data
data

In [None]:
# Text files containing records (table data)

# Import package
import numpy as np

# Load the file
filename = 'file.txt'
data = np.loadtxt(filename, delimiter=',', skiprows=1) # skiprows used for skipping 1 row of the data

# show the data
data

In [None]:
# Text files containing records (table data)

# Import package
import numpy as np

# Load the file
filename = 'file.txt'
data = np.loadtxt(filename, delimiter=',', usecols=[0, 2]) #usecols=[start, end]

# show the data
data

In [None]:
# Customize data import

data = np.loadtxt(filename, dtype=str)

Pickled files

In [None]:
# Import library
import pickle

# Import data from pkl
with open('file.pkl', 'rb') as file:
    data = pickle.load(file)

# Print data
print(data)

Importing Excel spreadsheets

In [None]:
# Import library
import pandas as pd

# Import data from excel
file = 'file.xlsx'
data = pd.ExcelFile(file)
print(data.sheet_names)

# Using parse
df1 = data.parse('sheet-name') # sheet name, as a string
df2 = data.parse(0) # sheet index, as a float

Importing SAS/Stata file

In [None]:
# SAS: Statistical Analysis System

import pandas as pd
from sas7bdat import SAS7BDAT

with SAS7BDAT('data.sas7bdat') as file:
    df_sas = file.to_data_frame()

In [None]:
# Stata: Statistics + Data

import pandas as pd

data = pd.read_stata('file.dta')

In [None]:
# Import sas7bdat package
from sas7bdat import SAS7BDAT

# Save file to a DataFrame: df_sas
with SAS7BDAT('sales.sas7bdat') as file:
    df_sas = file.to_data_frame()

# Print head of DataFrame
print(df_sas.head())

# Plot histogram of DataFrame features (pandas and pyplot already imported)
pd.DataFrame.hist(df_sas[['P']])
plt.ylabel('count')
plt.show()

Importing HDF5 files

In [None]:
# Hierchial Data Format 5

# Import library / Package
import h5py

# Import data
filename = 'file.hdf5'
data = h5py.File(filename, 'r') # read

# Show data
data

Importing MATLAB

In [None]:
import scripy.io
filename = 'file.mat'
mat = scripy.io.kiadmat(filename)

2. Write a text file

In [None]:
# Import file
filename = 'file.txt'

# Write in file
file = open(filename, mode='w')

# Close the file
file.close()

### Creating a database engine

In [None]:
# Using SQLite & SQLAlchemy

# Download package / library
from sqlalchemy import create_engine

# Import data
engine = create_engine('sqlite:///Northwind.sqlite')

# getting table names
table_names = engine.table_names()
print(table_names)

Querying relational database in python

In [None]:
# SQL Query in python

# Import package and libraries
from sqlalchemy import create_engine
import pandas as pd

# Import and connect data
engine = create_engine('sqlite:///Northwind.sqlite')
con = engine.connect()

# Query the database
rs = con.execute("SELECT * FROM Orders")

# Fetch the data and save as dataframe
df = pd.DataFrame(rs.fetchall())

# Set dataframes column name
df.columns = rs.keys()

# Close the connection
con.close()

In [None]:
# Import package and libraries
from sqlalchemy import create_engine
import pandas as pd

# Import and connect data
engine = create_engine('sqlite:///Northwind.sqlite')
con = engine.connect()

In [None]:
# With
with engine.connect() as con:
    rs = con.execute("SELECT * FROM Orders")
    df = pd.DataFrame(rs.fetchmany(size=5)) # 5 rows
    df.columns = rs.keys()

Query Data in Pandas

In [None]:
from sqlalchemy import create_engine
import pandas as pd

with engine.connect() as con:
    rs = con.execute("SELECT * FROM Orders")
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()

df = pd.read_sql_query("SELECT * FROM Orders", engine)


Exploiting table relationships

In [None]:
from sqlalchemy import create_engine
import pandas as pd

engine = create_engine('sqlite:///NorthWind.sqlite')
df = pd.read_sql_query("SELECT OrderID, CompanyName, FROM Orders INNER JOIN Customers on Orders.CustomerID = Customer.CustomerID", engine)

print(df.head())

In [None]:
# Open engine in context manager
# Perform query and save results to DataFrame: df
with engine.connect() as con:
    rs = con.execute("SELECT Title, Name FROM Album INNER JOIN Artist on Album.ArtistID = Artist.ArtistID")
    df = pd.DataFrame(rs.fetchall())
    df.columns = rs.keys()
# Print head of DataFrame df
print(df.head())