In [5]:
%load_ext sql
%config SqlMagic.autocommit=False # for engines that do not support autommit

from sqlalchemy import create_engine
from sqlalchemy.ext.declarative import declarative_base
import csv
import pandas as pd

# had issues with tables persisting using in memory 
db_connection_string = 'sqlite:///temp_db.db'

Base = declarative_base()
engine = create_engine( db_connection_string )
Base.metadata.create_all(engine)

engine

Engine(sqlite:///temp_db.db)

In [6]:
# load data into tables as is 
df = pd.read_csv('./data/titanic/train.csv')
df.to_sql(con=engine, name='train', if_exists='replace')

df1 = pd.read_csv('./data/titanic/gender_submission.csv')
df1.to_sql(con=engine, name='gender_submission', if_exists='replace')

df2 = pd.read_csv('./data/titanic/test.csv')
df2.to_sql(con=engine, name='test', if_exists='replace')


In [7]:
"After this cell its all SQL"

'After this cell its all SQL'

In [8]:
%sql sqlite:///temp_db.db


'Connected: @temp_db.db'

In [9]:
%%sql

select * from train
LIMIT 2;

select * from test
LIMIT 2;

select * from gender_submission
LIMIT 2;

 * sqlite:///temp_db.db
Done.
Done.
Done.


index,PassengerId,Survived
0,892,0
1,893,1


In [10]:
%%sql

-- create a CTE to make it easier in subsequent cells to reference this table

WITH passengers AS 
( 
 
    select PassengerId, PClass, Name, Sex, Age SibSp, Parch, Ticket, Fare, Cabin, Embarked, Survived
    from
    -- first join the test data to the labels
    (select * 
    from test
    JOIN gender_submission as gs
    ON test.PassengerId = gs.PassengerId)
    -- then add to the original train data for a complete set
    UNION
    select PassengerId, PClass, Name, Sex, Age SibSp, Parch, Ticket, Fare, Cabin, Embarked, Survived
    from train
) 
select * from passengers
LIMIT 3


 * sqlite:///temp_db.db
Done.


PassengerId,PClass,Name,Sex,SibSp,Parch,Ticket,Fare,Cabin,Embarked,Survived
1,3,"Braund, Mr. Owen Harris",male,22.0,0,A/5 21171,7.25,,S,0
2,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,0,PC 17599,71.2833,C85,C,1
3,3,"Heikkinen, Miss. Laina",female,26.0,0,STON/O2. 3101282,7.925,,S,1


In [11]:
%%sql 

select * from passengers
LIMIT 3

 * sqlite:///temp_db.db
Done.


index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Thayer)",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [23]:
%%sql 

select Survived, Pclass,  COUNT(Pclass)
FROM passengers
GROUP BY Survived,Pclass


 * sqlite:///temp_db.db
Done.


Survived,Pclass,COUNT(Pclass)
0,1,80
0,2,97
0,3,372
1,1,136
1,2,87
1,3,119


In [24]:
%%sql 

select * , AVG(Age) as avg_age
FROM passengers


 * sqlite:///temp_db.db
Done.


index,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,avg_age
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,29.69911764705882
