In [1]:
##Clear the environment
rm(list=ls())

##Turn off scientific notations for numbers
options(scipen = 999)  

##Set locale
Sys.setlocale("LC_ALL", "English") 

##Set seed for reproducibility
set.seed(2345)

# Turn off warnings
options(warn = -1)

getstats <- function(cm){
  # Sensititvity a.k.a TPR
  tpr <-cm[2,2]/(cm[2,2]+cm[2,1])
  fpr <-cm[1,2]/(cm[1,2]+cm[1,1])
  
  # Specificity a.k.a. TNR
  tnr <- cm[1,1]/(cm[1,1]+cm[1,2])
  fnr <- cm[2,1]/(cm[2,1]+cm[2,2])
  
  # Calculate accuracy
  acc <-(cm[2,2]+cm[1,1])/sum(cm)
  err <-(cm[1,2]+cm[2,1])/sum(cm)
  
  #Precision - Positive Predictive Value
  ppv <- cm[2,2]/(cm[2,2]+cm[1,2])
  
  # Negative Predictive Value
  npv <- cm[1,1]/(cm[1,1]+cm[2,1])
  
  rbind(TruePos_Sensitivity=tpr, FalsePos=fpr, TrueNeg_Specificty=tnr, FalseNeg=fnr, PositivePredictiveValue=ppv, NegativePredictiveValue=npv, Accuracy = acc, Error = err)
}

# clean the data names and data
# Use: df<-cleanit(df)
cleanit <-function(df){
  names(df) <-tolower(names(df))
  names(df) <- gsub("\\(","",names(df))
  names(df) <- gsub("\\)","",names(df))
  names(df) <- gsub("\\.","",names(df))
  names(df) <- gsub("_","",names(df))
  names(df) <- gsub("-","",names(df))
  names(df) <- gsub(",","",names(df))
  return(df)
}

##SQLITE 3 

In [1]:
library(sqldf)

: package 'sqldf' was built under R version 3.2.2Loading required package: gsubfn
Loading required package: proto
Loading required package: RSQLite
Loading required package: DBI


Connect to a database on S drive

In [46]:
dbname<-'S:/databases/chinook.db'
db <- dbConnect(SQLite(), dbname=dbname)

# List 

The tables in the database

In [47]:
dbListTables(db)              

In [50]:
#Alternative Method
sqldf('SELECT * FROM sqlite_master', connection=db)

Unnamed: 0,type,name,tbl_name,rootpage,sql
1,table,Album,Album,19,"CREATE TABLE [Album] (  [AlbumId] INTEGER NOT NULL,  [Title] NVARCHAR(160) NOT NULL,  [ArtistId] INTEGER NOT NULL,  CONSTRAINT [PK_Album] PRIMARY KEY ([AlbumId]),  FOREIGN KEY ([ArtistId]) REFERENCES [Artist] ([ArtistId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"
2,table,Artist,Artist,241,"CREATE TABLE [Artist] (  [ArtistId] INTEGER NOT NULL,  [Name] NVARCHAR(120),  CONSTRAINT [PK_Artist] PRIMARY KEY ([ArtistId]) )"
3,table,Customer,Customer,262,"CREATE TABLE [Customer] (  [CustomerId] INTEGER NOT NULL,  [FirstName] NVARCHAR(40) NOT NULL,  [LastName] NVARCHAR(20) NOT NULL,  [Company] NVARCHAR(80),  [Address] NVARCHAR(70),  [City] NVARCHAR(40),  [State] NVARCHAR(40),  [Country] NVARCHAR(40),  [PostalCode] NVARCHAR(10),  [Phone] NVARCHAR(24),  [Fax] NVARCHAR(24),  [Email] NVARCHAR(60) NOT NULL,  [SupportRepId] INTEGER,  CONSTRAINT [PK_Customer] PRIMARY KEY ([CustomerId]),  FOREIGN KEY ([SupportRepId]) REFERENCES [Employee] ([EmployeeId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"
4,table,Employee,Employee,265,"CREATE TABLE [Employee] (  [EmployeeId] INTEGER NOT NULL,  [LastName] NVARCHAR(20) NOT NULL,  [FirstName] NVARCHAR(20) NOT NULL,  [Title] NVARCHAR(30),  [ReportsTo] INTEGER,  [BirthDate] DATETIME,  [HireDate] DATETIME,  [Address] NVARCHAR(70),  [City] NVARCHAR(40),  [State] NVARCHAR(40),  [Country] NVARCHAR(40),  [PostalCode] NVARCHAR(10),  [Phone] NVARCHAR(24),  [Fax] NVARCHAR(24),  [Email] NVARCHAR(60),  CONSTRAINT [PK_Employee] PRIMARY KEY ([EmployeeId]),  FOREIGN KEY ([ReportsTo]) REFERENCES [Employee] ([EmployeeId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"
5,table,Genre,Genre,269,"CREATE TABLE [Genre] (  [GenreId] INTEGER NOT NULL,  [Name] NVARCHAR(120),  CONSTRAINT [PK_Genre] PRIMARY KEY ([GenreId]) )"
6,table,Invoice,Invoice,270,"CREATE TABLE [Invoice] (  [InvoiceId] INTEGER NOT NULL,  [CustomerId] INTEGER NOT NULL,  [InvoiceDate] DATETIME NOT NULL,  [BillingAddress] NVARCHAR(70),  [BillingCity] NVARCHAR(40),  [BillingState] NVARCHAR(40),  [BillingCountry] NVARCHAR(40),  [BillingPostalCode] NVARCHAR(10),  [Total] NUMERIC(10,2) NOT NULL,  CONSTRAINT [PK_Invoice] PRIMARY KEY ([InvoiceId]),  FOREIGN KEY ([CustomerId]) REFERENCES [Customer] ([CustomerId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"
7,table,InvoiceLine,InvoiceLine,272,"CREATE TABLE [InvoiceLine] (  [InvoiceLineId] INTEGER NOT NULL,  [InvoiceId] INTEGER NOT NULL,  [TrackId] INTEGER NOT NULL,  [UnitPrice] NUMERIC(10,2) NOT NULL,  [Quantity] INTEGER NOT NULL,  CONSTRAINT [PK_InvoiceLine] PRIMARY KEY ([InvoiceLineId]),  FOREIGN KEY ([InvoiceId]) REFERENCES [Invoice] ([InvoiceId]) ON DELETE NO ACTION ON UPDATE NO ACTION,  FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"
8,table,MediaType,MediaType,275,"CREATE TABLE [MediaType] (  [MediaTypeId] INTEGER NOT NULL,  [Name] NVARCHAR(120),  CONSTRAINT [PK_MediaType] PRIMARY KEY ([MediaTypeId]) )"
9,table,Playlist,Playlist,277,"CREATE TABLE [Playlist] (  [PlaylistId] INTEGER NOT NULL,  [Name] NVARCHAR(120),  CONSTRAINT [PK_Playlist] PRIMARY KEY ([PlaylistId]) )"
10,table,PlaylistTrack,PlaylistTrack,278,"CREATE TABLE [PlaylistTrack] (  [PlaylistId] INTEGER NOT NULL,  [TrackId] INTEGER NOT NULL,  CONSTRAINT [PK_PlaylistTrack] PRIMARY KEY ([PlaylistId], [TrackId]),  FOREIGN KEY ([PlaylistId]) REFERENCES [Playlist] ([PlaylistId]) ON DELETE NO ACTION ON UPDATE NO ACTION,  FOREIGN KEY ([TrackId]) REFERENCES [Track] ([TrackId]) ON DELETE NO ACTION ON UPDATE NO ACTION )"


The columns in a table

In [6]:
dbListFields(db, 'Genre')   

In [52]:
#Alternative Method
sqldf('pragma table_info(Genre)', connection=db) 

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
1,0,GenreId,INTEGER,1,,1
2,1,Name,NVARCHAR(120),0,,0


The data in a table

In [8]:
dbReadTable(db, 'Genre')    

Unnamed: 0,GenreId,Name
1,1,Rock
2,2,Jazz
3,3,Metal
4,4,Alternative & Punk
5,5,Rock And Roll
6,6,Blues
7,7,Latin
8,8,Reggae
9,9,Pop
10,10,Soundtrack


In [53]:
 #Alternative
sqldf('SELECT * FROM Genre',connection=db)  

Unnamed: 0,GenreId,Name
1,1,Rock
2,2,Jazz
3,3,Metal
4,4,Alternative & Punk
5,5,Rock And Roll
6,6,Blues
7,7,Latin
8,8,Reggae
9,9,Pop
10,10,Soundtrack


#Create Table from CSV File

In [10]:
nba<-read.csv('D:/Data/NBA_train.csv',header=TRUE)
dbWriteTable(conn = db, name = 'NBA', value = nba, row.names = FALSE)
sqldf('SELECT * FROM NBA', dbname=dbname) 
dbRemoveTable(db, 'NBA')   
rm(nba)

Unnamed: 0,SeasonEnd,Team,Playoffs,W,PTS,oppPTS,FG,FGA,X2P,X2PA,X3P,X3PA,FT,FTA,ORB,DRB,AST,STL,BLK,TOV
1,1980,Atlanta Hawks,1,50,8573,8334,3261,7027,3248,6952,13,75,2038,2645,1369,2406,1913,782,539,1495
2,1980,Boston Celtics,1,61,9303,8664,3617,7387,3455,6965,162,422,1907,2449,1227,2457,2198,809,308,1539
3,1980,Chicago Bulls,0,30,8813,9035,3362,6943,3292,6668,70,275,2019,2592,1115,2465,2152,704,392,1684
4,1980,Cleveland Cavaliers,0,37,9360,9332,3811,8041,3775,7854,36,187,1702,2205,1307,2381,2108,764,342,1370
5,1980,Denver Nuggets,0,30,8878,9240,3462,7470,3379,7215,83,255,1871,2539,1311,2524,2079,746,404,1533
6,1980,Detroit Pistons,0,16,8933,9609,3643,7596,3586,7377,57,219,1590,2149,1226,2415,1950,783,562,1742
7,1980,Golden State Warriors,0,24,8493,8853,3527,7318,3500,7197,27,121,1412,1914,1155,2437,2028,779,339,1492
8,1980,Houston Rockets,1,41,9084,9070,3599,7496,3495,7117,104,379,1782,2326,1394,2217,2149,782,373,1565
9,1980,Indiana Pacers,0,37,9119,9176,3639,7689,3551,7375,88,314,1753,2333,1398,2326,2148,900,530,1517
10,1980,Kansas City Kings,1,47,8860,8603,3582,7489,3557,7375,25,114,1671,2250,1187,2429,2123,863,356,1439


In [11]:
# Check to see if you need eol='\r' or eol='\n'
read.csv.sql('D:/Data/NBA_train.csv',  eol = '\r', sql = 'CREATE TABLE NBA AS SELECT * FROM file', row.names=FALSE, dbname = dbname)
sqldf('SELECT * FROM NBA', dbname=dbname) 
dbRemoveTable(db, 'NBA')   

NULL

Unnamed: 0,SeasonEnd,Team,Playoffs,W,PTS,oppPTS,FG,FGA,X2P,X2PA,X3P,X3PA,FT,FTA,ORB,DRB,AST,STL,BLK,TOV
1,1980,Atlanta Hawks,1,50,8573,8334,3261,7027,3248,6952,13,75,2038,2645,1369,2406,1913,782,539,1495
2,1980,Boston Celtics,1,61,9303,8664,3617,7387,3455,6965,162,422,1907,2449,1227,2457,2198,809,308,1539
3,1980,Chicago Bulls,0,30,8813,9035,3362,6943,3292,6668,70,275,2019,2592,1115,2465,2152,704,392,1684
4,1980,Cleveland Cavaliers,0,37,9360,9332,3811,8041,3775,7854,36,187,1702,2205,1307,2381,2108,764,342,1370
5,1980,Denver Nuggets,0,30,8878,9240,3462,7470,3379,7215,83,255,1871,2539,1311,2524,2079,746,404,1533
6,1980,Detroit Pistons,0,16,8933,9609,3643,7596,3586,7377,57,219,1590,2149,1226,2415,1950,783,562,1742
7,1980,Golden State Warriors,0,24,8493,8853,3527,7318,3500,7197,27,121,1412,1914,1155,2437,2028,779,339,1492
8,1980,Houston Rockets,1,41,9084,9070,3599,7496,3495,7117,104,379,1782,2326,1394,2217,2149,782,373,1565
9,1980,Indiana Pacers,0,37,9119,9176,3639,7689,3551,7375,88,314,1753,2333,1398,2326,2148,900,530,1517
10,1980,Kansas City Kings,1,47,8860,8603,3582,7489,3557,7375,25,114,1671,2250,1187,2429,2123,863,356,1439


# Close connection

In [54]:
dbDisconnect(db)           

##MySQL

In [55]:
library(RMySQL)
db<-dbConnect(MySQL(),username="pi",password="raspberry",host="192.168.1.74",unix.sock="/tmp/mysql.sock",dbname="employees")

In [56]:
dbListTables(db)
dbListFields(db, 'dept_manager')   
dbReadTable(db, 'dept_manager')    

Unnamed: 0,emp_no,dept_no,from_date,to_date
1,110022,d001,1985-01-01,1991-10-01
2,110039,d001,1991-10-01,9999-01-01
3,110085,d002,1985-01-01,1989-12-17
4,110114,d002,1989-12-17,9999-01-01
5,110183,d003,1985-01-01,1992-03-21
6,110228,d003,1992-03-21,9999-01-01
7,110303,d004,1985-01-01,1988-09-09
8,110344,d004,1988-09-09,1992-08-02
9,110386,d004,1992-08-02,1996-08-30
10,110420,d004,1996-08-30,9999-01-01


In [57]:
sqldf('SELECT * FROM dept_manager',connection=db)

Unnamed: 0,emp_no,dept_no,from_date,to_date
1,110022,d001,1985-01-01,1991-10-01
2,110039,d001,1991-10-01,9999-01-01
3,110085,d002,1985-01-01,1989-12-17
4,110114,d002,1989-12-17,9999-01-01
5,110183,d003,1985-01-01,1992-03-21
6,110228,d003,1992-03-21,9999-01-01
7,110303,d004,1985-01-01,1988-09-09
8,110344,d004,1988-09-09,1992-08-02
9,110386,d004,1992-08-02,1996-08-30
10,110420,d004,1996-08-30,9999-01-01


In [104]:
dbDisconnect(db) 
#options(sqldf.driver = "RMySQL")

##PostGreSQL

In [20]:
# Establish connection to PoststgreSQL using RPostgreSQL
library(RPostgreSQL)
db<- dbConnect(PostgreSQL(), dbname="chinook",host="raspberrypi01",port=5432,user="postgres",password="")

In [23]:
dbListTables(db)
dbListFields(db, 'Genre')   
dbReadTable(db, 'Genre')    

Unnamed: 0,GenreId,Name
1,1,Rock
2,2,Jazz
3,3,Metal
4,4,Alternative & Punk
5,5,Rock And Roll
6,6,Blues
7,7,Latin
8,8,Reggae
9,9,Pop
10,10,Soundtrack


ERROR: Error in .local(drv, ...): Failed to connect to database: Error: Can't connect to MySQL server on 'localhost' (0)



ERROR: Error in !dbPreExists: invalid argument type


In [31]:
sqldf('SELECT * FROM Genre',drv="RPostgreSQL")

In postgresqlQuickSQL(conn, statement, ...): Could not create executeSELECT * FROM Genre

NULL

In [45]:
library(RPostgreSQL)

## loads the PostgreSQL driver
drv <- dbDriver('PostgreSQL')

## Open a connection
con <- dbConnect(drv, dbname="chinook", host='192.168.1.91',user='postgres',password="")

## Submits a statement
rs <- dbSendQuery(con, 'select * from playlist')

## fetch all elements from the result set
fetch(rs,n=-1)

## Submit and execute the query
dbGetQuery(con, 'select * from playlist')

## Closes the connection
dbDisconnect(con)

## Frees all the resources on the driver
dbUnloadDriver(drv)

ERROR: Error in postgresqlExecStatement(conn, statement, ...): RS-DBI driver: (could not Retrieve the result : ERROR:  relation "playlist" does not exist
LINE 1: select * from playlist
                      ^
)


ERROR: Error in fetch(rs, n = -1): error in evaluating the argument 'res' in selecting a method for function 'fetch': Error: object 'rs' not found



In postgresqlQuickSQL(conn, statement, ...): Could not create executeselect * from playlist

NULL

ERROR: Error in postgresqlCloseDriver(drv, ...): RS-DBI driver: (There are opened connections -- close them first)
